scraperwiki 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scraperwiki/sqlite_save_info.rb +204 -0
- metadata +2 -1
@@ -0,0 +1,204 @@
|
|
1
|
+
# Builds schemas automatically from a hash, for SQLite databases
|
2
|
+
#
|
3
|
+
# Ported from ScraperWiki Classic - scraperwiki/services/datastore/datalib.py
|
4
|
+
# This will make the code quite unRubyish - it is Julian Todd's Python, ported.
|
5
|
+
|
6
|
+
|
7
|
+
# TODO:
|
8
|
+
# Sort out 'error' bits
|
9
|
+
|
10
|
+
require 'set'
|
11
|
+
require 'sqlite3'
|
12
|
+
|
13
|
+
module SQLiteMagic
|
14
|
+
@db = SQLite3::Database.new("scraperwiki.sqlite")
|
15
|
+
@sqlitesaveinfo = {}
|
16
|
+
|
17
|
+
def SQLiteMagic._do_save_sqlite(unique_keys, data, swdatatblname)
|
18
|
+
res = { }
|
19
|
+
if data.class == Hash
|
20
|
+
data = [data]
|
21
|
+
end
|
22
|
+
|
23
|
+
if !@sqlitesaveinfo.include?(swdatatblname)
|
24
|
+
ssinfo = SqliteSaveInfo.new(swdatatblname, @db)
|
25
|
+
@sqlitesaveinfo[swdatatblname] = ssinfo
|
26
|
+
if not ssinfo.rebuildinfo() and data.length > 0
|
27
|
+
ssinfo.buildinitialtable(data[0])
|
28
|
+
ssinfo.rebuildinfo()
|
29
|
+
res["tablecreated"] = swdatatblname
|
30
|
+
end
|
31
|
+
else
|
32
|
+
ssinfo = @sqlitesaveinfo[swdatatblname]
|
33
|
+
end
|
34
|
+
|
35
|
+
@db.transaction()
|
36
|
+
|
37
|
+
nrecords = 0
|
38
|
+
data.each do |ldata|
|
39
|
+
newcols = ssinfo.newcolumns(ldata)
|
40
|
+
if newcols.length > 0
|
41
|
+
newcols.each_with_index do |kv, i|
|
42
|
+
ssinfo.addnewcolumn(kv[0], kv[1])
|
43
|
+
res["newcolumn %d" % i] = "%s %s" % kv
|
44
|
+
end
|
45
|
+
ssinfo.rebuildinfo()
|
46
|
+
end
|
47
|
+
|
48
|
+
if nrecords == 0 && unique_keys.length > 0
|
49
|
+
idxname, idxkeys = ssinfo.findclosestindex(unique_keys)
|
50
|
+
# puts "findclosestindex returned name:"+ idxname.to_s + " keys:" + idxkeys.to_s
|
51
|
+
if !idxname || idxkeys != unique_keys.to_set
|
52
|
+
lres = ssinfo.makenewindex(idxname, unique_keys)
|
53
|
+
if lres.include?('error')
|
54
|
+
return lres
|
55
|
+
end
|
56
|
+
res.merge!(lres)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
lres = ssinfo.insertdata(ldata)
|
61
|
+
if lres.include?('error')
|
62
|
+
return lres
|
63
|
+
end
|
64
|
+
nrecords += 1
|
65
|
+
end
|
66
|
+
|
67
|
+
@db.commit()
|
68
|
+
# log(nrecords + " inserted or replaced")
|
69
|
+
return res
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
class SqliteSaveInfo
|
74
|
+
def initialize(swdatatblname, db)
|
75
|
+
@swdatatblname = swdatatblname
|
76
|
+
@swdatakeys = [ ]
|
77
|
+
@swdatatypes = [ ]
|
78
|
+
@sqdatatemplate = ""
|
79
|
+
@db = db
|
80
|
+
end
|
81
|
+
|
82
|
+
def rebuildinfo()
|
83
|
+
does_exist = @db.get_first_value("select count(*) from main.sqlite_master where name=?", @swdatatblname)
|
84
|
+
if does_exist == 0
|
85
|
+
return false
|
86
|
+
end
|
87
|
+
|
88
|
+
tblinfo = @db.execute("PRAGMA main.table_info(`%s`)" % @swdatatblname)
|
89
|
+
# puts "tblinfo="+ tblinfo.to_s
|
90
|
+
# there's a bug: PRAGMA main.table_info(swdata) returns the schema for otherdatabase.swdata
|
91
|
+
# following an attach otherdatabase where otherdatabase has a swdata and main does not
|
92
|
+
|
93
|
+
@swdatakeys = tblinfo.map { |a| a[1] }
|
94
|
+
@swdatatypes = tblinfo.map { |a| a[2] }
|
95
|
+
@sqdatatemplate = format("insert or replace into main.`%s` values (%s)", @swdatatblname, (["?"]*@swdatakeys.length).join(","))
|
96
|
+
return true
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
def buildinitialtable(data)
|
101
|
+
raise "buildinitialtable: no swdatakeys" unless @swdatakeys.length == 0
|
102
|
+
coldef = self.newcolumns(data)
|
103
|
+
raise "buildinitialtable: no coldef" unless coldef.length > 0
|
104
|
+
# coldef = coldef[:1] # just put one column in; the rest could be altered -- to prove it's good
|
105
|
+
scoldef = coldef.map { |col| format("`%s` %s", col[0], col[1]) }.join(",")
|
106
|
+
# used to just add date_scraped in, but without it can't create an empty table
|
107
|
+
@db.execute(format("create table main.`%s` (%s)", @swdatatblname, scoldef))
|
108
|
+
end
|
109
|
+
|
110
|
+
def newcolumns(data)
|
111
|
+
newcols = [ ]
|
112
|
+
for k, v in data
|
113
|
+
if !@swdatakeys.include?(k)
|
114
|
+
if v != nil
|
115
|
+
#if k[-5:] == "_blob"
|
116
|
+
# vt = "blob" # coerced into affinity none
|
117
|
+
if v.class == Fixnum
|
118
|
+
vt = "integer"
|
119
|
+
elsif v.class == Float
|
120
|
+
vt = "real"
|
121
|
+
else
|
122
|
+
vt = "text"
|
123
|
+
end
|
124
|
+
newcols.push([k, vt])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
# puts "newcols=" + newcols.to_s
|
129
|
+
return newcols
|
130
|
+
end
|
131
|
+
|
132
|
+
def addnewcolumn(k, vt)
|
133
|
+
@db.execute(format("alter table main.`%s` add column `%s` %s", @swdatatblname, k, vt))
|
134
|
+
end
|
135
|
+
|
136
|
+
def findclosestindex(unique_keys)
|
137
|
+
idxlist = @db.execute(format("PRAGMA main.index_list(`%s`)", @swdatatblname)) # [seq,name,unique]
|
138
|
+
# puts "findclosestindex: idxlist is "+ idxlist.to_s
|
139
|
+
if idxlist.include?('error')
|
140
|
+
return [nil, nil]
|
141
|
+
end
|
142
|
+
|
143
|
+
uniqueindexes = [ ]
|
144
|
+
for idxel in idxlist
|
145
|
+
if idxel[2]
|
146
|
+
idxname = idxel[1]
|
147
|
+
idxinfo = @db.execute(format("PRAGMA main.index_info(`%s`)", idxname)) # [seqno,cid,name]
|
148
|
+
idxset = idxinfo.map { |a| a[2] }.to_set
|
149
|
+
idxoverlap = idxset.intersection(unique_keys).length
|
150
|
+
uniqueindexes.push([idxoverlap, idxname, idxset])
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
if uniqueindexes.length == 0
|
155
|
+
return [nil, nil]
|
156
|
+
end
|
157
|
+
uniqueindexes.sort()
|
158
|
+
# puts "uniqueindexes=" + uniqueindexes.to_s
|
159
|
+
return [uniqueindexes[-1][1], uniqueindexes[-1][2]]
|
160
|
+
end
|
161
|
+
|
162
|
+
# increment to next index number every time there is a change, and add the new index before dropping the old one.
|
163
|
+
def makenewindex(idxname, unique_keys)
|
164
|
+
istart = 0
|
165
|
+
if idxname
|
166
|
+
mnum = re.search("(\d+)$", idxname)
|
167
|
+
if mnum
|
168
|
+
istart = int(mnum.group(1))
|
169
|
+
end
|
170
|
+
end
|
171
|
+
for i in 0..10000
|
172
|
+
newidxname = format("%s_index%d", @swdatatblname, istart+i)
|
173
|
+
does_exist = @db.get_first_value("select count(*) from main.sqlite_master where name=?", newidxname)
|
174
|
+
if does_exist == 0
|
175
|
+
break
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
res = { "newindex" => newidxname }
|
180
|
+
lres = @db.execute(format("create unique index `%s` on `%s` (%s)", newidxname, @swdatatblname, unique_keys.map { |k| format("`%s`", k) }.join(",")))
|
181
|
+
if lres.include?('error')
|
182
|
+
return lres
|
183
|
+
end
|
184
|
+
if idxname
|
185
|
+
lres = @db.execute(format("drop index main.`%s`", idxname))
|
186
|
+
if lres.include?('error')
|
187
|
+
if lres['error'] != 'sqlite3.Error: index associated with UNIQUE or PRIMARY KEY constraint cannot be dropped'
|
188
|
+
return lres
|
189
|
+
end
|
190
|
+
end
|
191
|
+
res["droppedindex"] = idxname
|
192
|
+
end
|
193
|
+
return res
|
194
|
+
end
|
195
|
+
|
196
|
+
def insertdata(data)
|
197
|
+
values = @swdatakeys.map { |k| data[k] } # this was data.get(k) in Python
|
198
|
+
return @db.query(@sqdatatemplate, values)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
|
204
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scraperwiki
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -18,6 +18,7 @@ extensions: []
|
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/scraperwiki.rb
|
21
|
+
- lib/scraperwiki/sqlite_save_info.rb
|
21
22
|
homepage: http://rubygems.org/gems/scraperwiki
|
22
23
|
licenses: []
|
23
24
|
post_install_message:
|