scraperwiki 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/scraperwiki/sqlite_save_info.rb +204 -0
- metadata +2 -1
@@ -0,0 +1,204 @@
|
|
1
|
+
# Builds schemas automatically from a hash, for SQLite databases
|
2
|
+
#
|
3
|
+
# Ported from ScraperWiki Classic - scraperwiki/services/datastore/datalib.py
|
4
|
+
# This will make the code quite unRubyish - it is Julian Todd's Python, ported.
|
5
|
+
|
6
|
+
|
7
|
+
# TODO:
|
8
|
+
# Sort out 'error' bits
|
9
|
+
|
10
|
+
require 'set'
|
11
|
+
require 'sqlite3'
|
12
|
+
|
13
|
+
module SQLiteMagic
|
14
|
+
@db = SQLite3::Database.new("scraperwiki.sqlite")
|
15
|
+
@sqlitesaveinfo = {}
|
16
|
+
|
17
|
+
def SQLiteMagic._do_save_sqlite(unique_keys, data, swdatatblname)
|
18
|
+
res = { }
|
19
|
+
if data.class == Hash
|
20
|
+
data = [data]
|
21
|
+
end
|
22
|
+
|
23
|
+
if !@sqlitesaveinfo.include?(swdatatblname)
|
24
|
+
ssinfo = SqliteSaveInfo.new(swdatatblname, @db)
|
25
|
+
@sqlitesaveinfo[swdatatblname] = ssinfo
|
26
|
+
if not ssinfo.rebuildinfo() and data.length > 0
|
27
|
+
ssinfo.buildinitialtable(data[0])
|
28
|
+
ssinfo.rebuildinfo()
|
29
|
+
res["tablecreated"] = swdatatblname
|
30
|
+
end
|
31
|
+
else
|
32
|
+
ssinfo = @sqlitesaveinfo[swdatatblname]
|
33
|
+
end
|
34
|
+
|
35
|
+
@db.transaction()
|
36
|
+
|
37
|
+
nrecords = 0
|
38
|
+
data.each do |ldata|
|
39
|
+
newcols = ssinfo.newcolumns(ldata)
|
40
|
+
if newcols.length > 0
|
41
|
+
newcols.each_with_index do |kv, i|
|
42
|
+
ssinfo.addnewcolumn(kv[0], kv[1])
|
43
|
+
res["newcolumn %d" % i] = "%s %s" % kv
|
44
|
+
end
|
45
|
+
ssinfo.rebuildinfo()
|
46
|
+
end
|
47
|
+
|
48
|
+
if nrecords == 0 && unique_keys.length > 0
|
49
|
+
idxname, idxkeys = ssinfo.findclosestindex(unique_keys)
|
50
|
+
# puts "findclosestindex returned name:"+ idxname.to_s + " keys:" + idxkeys.to_s
|
51
|
+
if !idxname || idxkeys != unique_keys.to_set
|
52
|
+
lres = ssinfo.makenewindex(idxname, unique_keys)
|
53
|
+
if lres.include?('error')
|
54
|
+
return lres
|
55
|
+
end
|
56
|
+
res.merge!(lres)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
lres = ssinfo.insertdata(ldata)
|
61
|
+
if lres.include?('error')
|
62
|
+
return lres
|
63
|
+
end
|
64
|
+
nrecords += 1
|
65
|
+
end
|
66
|
+
|
67
|
+
@db.commit()
|
68
|
+
# log(nrecords + " inserted or replaced")
|
69
|
+
return res
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
class SqliteSaveInfo
|
74
|
+
def initialize(swdatatblname, db)
|
75
|
+
@swdatatblname = swdatatblname
|
76
|
+
@swdatakeys = [ ]
|
77
|
+
@swdatatypes = [ ]
|
78
|
+
@sqdatatemplate = ""
|
79
|
+
@db = db
|
80
|
+
end
|
81
|
+
|
82
|
+
def rebuildinfo()
|
83
|
+
does_exist = @db.get_first_value("select count(*) from main.sqlite_master where name=?", @swdatatblname)
|
84
|
+
if does_exist == 0
|
85
|
+
return false
|
86
|
+
end
|
87
|
+
|
88
|
+
tblinfo = @db.execute("PRAGMA main.table_info(`%s`)" % @swdatatblname)
|
89
|
+
# puts "tblinfo="+ tblinfo.to_s
|
90
|
+
# there's a bug: PRAGMA main.table_info(swdata) returns the schema for otherdatabase.swdata
|
91
|
+
# following an attach otherdatabase where otherdatabase has a swdata and main does not
|
92
|
+
|
93
|
+
@swdatakeys = tblinfo.map { |a| a[1] }
|
94
|
+
@swdatatypes = tblinfo.map { |a| a[2] }
|
95
|
+
@sqdatatemplate = format("insert or replace into main.`%s` values (%s)", @swdatatblname, (["?"]*@swdatakeys.length).join(","))
|
96
|
+
return true
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
def buildinitialtable(data)
|
101
|
+
raise "buildinitialtable: no swdatakeys" unless @swdatakeys.length == 0
|
102
|
+
coldef = self.newcolumns(data)
|
103
|
+
raise "buildinitialtable: no coldef" unless coldef.length > 0
|
104
|
+
# coldef = coldef[:1] # just put one column in; the rest could be altered -- to prove it's good
|
105
|
+
scoldef = coldef.map { |col| format("`%s` %s", col[0], col[1]) }.join(",")
|
106
|
+
# used to just add date_scraped in, but without it can't create an empty table
|
107
|
+
@db.execute(format("create table main.`%s` (%s)", @swdatatblname, scoldef))
|
108
|
+
end
|
109
|
+
|
110
|
+
def newcolumns(data)
|
111
|
+
newcols = [ ]
|
112
|
+
for k, v in data
|
113
|
+
if !@swdatakeys.include?(k)
|
114
|
+
if v != nil
|
115
|
+
#if k[-5:] == "_blob"
|
116
|
+
# vt = "blob" # coerced into affinity none
|
117
|
+
if v.class == Fixnum
|
118
|
+
vt = "integer"
|
119
|
+
elsif v.class == Float
|
120
|
+
vt = "real"
|
121
|
+
else
|
122
|
+
vt = "text"
|
123
|
+
end
|
124
|
+
newcols.push([k, vt])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
# puts "newcols=" + newcols.to_s
|
129
|
+
return newcols
|
130
|
+
end
|
131
|
+
|
132
|
+
def addnewcolumn(k, vt)
|
133
|
+
@db.execute(format("alter table main.`%s` add column `%s` %s", @swdatatblname, k, vt))
|
134
|
+
end
|
135
|
+
|
136
|
+
def findclosestindex(unique_keys)
|
137
|
+
idxlist = @db.execute(format("PRAGMA main.index_list(`%s`)", @swdatatblname)) # [seq,name,unique]
|
138
|
+
# puts "findclosestindex: idxlist is "+ idxlist.to_s
|
139
|
+
if idxlist.include?('error')
|
140
|
+
return [nil, nil]
|
141
|
+
end
|
142
|
+
|
143
|
+
uniqueindexes = [ ]
|
144
|
+
for idxel in idxlist
|
145
|
+
if idxel[2]
|
146
|
+
idxname = idxel[1]
|
147
|
+
idxinfo = @db.execute(format("PRAGMA main.index_info(`%s`)", idxname)) # [seqno,cid,name]
|
148
|
+
idxset = idxinfo.map { |a| a[2] }.to_set
|
149
|
+
idxoverlap = idxset.intersection(unique_keys).length
|
150
|
+
uniqueindexes.push([idxoverlap, idxname, idxset])
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
if uniqueindexes.length == 0
|
155
|
+
return [nil, nil]
|
156
|
+
end
|
157
|
+
uniqueindexes.sort()
|
158
|
+
# puts "uniqueindexes=" + uniqueindexes.to_s
|
159
|
+
return [uniqueindexes[-1][1], uniqueindexes[-1][2]]
|
160
|
+
end
|
161
|
+
|
162
|
+
# increment to next index number every time there is a change, and add the new index before dropping the old one.
|
163
|
+
def makenewindex(idxname, unique_keys)
|
164
|
+
istart = 0
|
165
|
+
if idxname
|
166
|
+
mnum = re.search("(\d+)$", idxname)
|
167
|
+
if mnum
|
168
|
+
istart = int(mnum.group(1))
|
169
|
+
end
|
170
|
+
end
|
171
|
+
for i in 0..10000
|
172
|
+
newidxname = format("%s_index%d", @swdatatblname, istart+i)
|
173
|
+
does_exist = @db.get_first_value("select count(*) from main.sqlite_master where name=?", newidxname)
|
174
|
+
if does_exist == 0
|
175
|
+
break
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
res = { "newindex" => newidxname }
|
180
|
+
lres = @db.execute(format("create unique index `%s` on `%s` (%s)", newidxname, @swdatatblname, unique_keys.map { |k| format("`%s`", k) }.join(",")))
|
181
|
+
if lres.include?('error')
|
182
|
+
return lres
|
183
|
+
end
|
184
|
+
if idxname
|
185
|
+
lres = @db.execute(format("drop index main.`%s`", idxname))
|
186
|
+
if lres.include?('error')
|
187
|
+
if lres['error'] != 'sqlite3.Error: index associated with UNIQUE or PRIMARY KEY constraint cannot be dropped'
|
188
|
+
return lres
|
189
|
+
end
|
190
|
+
end
|
191
|
+
res["droppedindex"] = idxname
|
192
|
+
end
|
193
|
+
return res
|
194
|
+
end
|
195
|
+
|
196
|
+
def insertdata(data)
|
197
|
+
values = @swdatakeys.map { |k| data[k] } # this was data.get(k) in Python
|
198
|
+
return @db.query(@sqdatatemplate, values)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
|
204
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scraperwiki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -18,6 +18,7 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/scraperwiki.rb
|
21
|
+
- lib/scraperwiki/sqlite_save_info.rb
|
21
22
|
homepage: http://rubygems.org/gems/scraperwiki
|
22
23
|
licenses: []
|
23
24
|
post_install_message:
|