scraperwiki 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,204 @@
1
+ # Builds schemas automatically from a hash, for SQLite databases
2
+ #
3
+ # Ported from ScraperWiki Classic - scraperwiki/services/datastore/datalib.py
4
+ # This will make the code quite unRubyish - it is Julian Todd's Python, ported.
5
+
6
+
7
+ # TODO:
8
+ # Sort out 'error' bits
9
+
10
+ require 'set'
11
+ require 'sqlite3'
12
+
13
+ module SQLiteMagic
14
+ @db = SQLite3::Database.new("scraperwiki.sqlite")
15
+ @sqlitesaveinfo = {}
16
+
17
+ def SQLiteMagic._do_save_sqlite(unique_keys, data, swdatatblname)
18
+ res = { }
19
+ if data.class == Hash
20
+ data = [data]
21
+ end
22
+
23
+ if !@sqlitesaveinfo.include?(swdatatblname)
24
+ ssinfo = SqliteSaveInfo.new(swdatatblname, @db)
25
+ @sqlitesaveinfo[swdatatblname] = ssinfo
26
+ if not ssinfo.rebuildinfo() and data.length > 0
27
+ ssinfo.buildinitialtable(data[0])
28
+ ssinfo.rebuildinfo()
29
+ res["tablecreated"] = swdatatblname
30
+ end
31
+ else
32
+ ssinfo = @sqlitesaveinfo[swdatatblname]
33
+ end
34
+
35
+ @db.transaction()
36
+
37
+ nrecords = 0
38
+ data.each do |ldata|
39
+ newcols = ssinfo.newcolumns(ldata)
40
+ if newcols.length > 0
41
+ newcols.each_with_index do |kv, i|
42
+ ssinfo.addnewcolumn(kv[0], kv[1])
43
+ res["newcolumn %d" % i] = "%s %s" % kv
44
+ end
45
+ ssinfo.rebuildinfo()
46
+ end
47
+
48
+ if nrecords == 0 && unique_keys.length > 0
49
+ idxname, idxkeys = ssinfo.findclosestindex(unique_keys)
50
+ # puts "findclosestindex returned name:"+ idxname.to_s + " keys:" + idxkeys.to_s
51
+ if !idxname || idxkeys != unique_keys.to_set
52
+ lres = ssinfo.makenewindex(idxname, unique_keys)
53
+ if lres.include?('error')
54
+ return lres
55
+ end
56
+ res.merge!(lres)
57
+ end
58
+ end
59
+
60
+ lres = ssinfo.insertdata(ldata)
61
+ if lres.include?('error')
62
+ return lres
63
+ end
64
+ nrecords += 1
65
+ end
66
+
67
+ @db.commit()
68
+ # log(nrecords + " inserted or replaced")
69
+ return res
70
+ end
71
+
72
+
73
+ class SqliteSaveInfo
74
+ def initialize(swdatatblname, db)
75
+ @swdatatblname = swdatatblname
76
+ @swdatakeys = [ ]
77
+ @swdatatypes = [ ]
78
+ @sqdatatemplate = ""
79
+ @db = db
80
+ end
81
+
82
+ def rebuildinfo()
83
+ does_exist = @db.get_first_value("select count(*) from main.sqlite_master where name=?", @swdatatblname)
84
+ if does_exist == 0
85
+ return false
86
+ end
87
+
88
+ tblinfo = @db.execute("PRAGMA main.table_info(`%s`)" % @swdatatblname)
89
+ # puts "tblinfo="+ tblinfo.to_s
90
+ # there's a bug: PRAGMA main.table_info(swdata) returns the schema for otherdatabase.swdata
91
+ # following an attach otherdatabase where otherdatabase has a swdata and main does not
92
+
93
+ @swdatakeys = tblinfo.map { |a| a[1] }
94
+ @swdatatypes = tblinfo.map { |a| a[2] }
95
+ @sqdatatemplate = format("insert or replace into main.`%s` values (%s)", @swdatatblname, (["?"]*@swdatakeys.length).join(","))
96
+ return true
97
+ end
98
+
99
+
100
+ def buildinitialtable(data)
101
+ raise "buildinitialtable: no swdatakeys" unless @swdatakeys.length == 0
102
+ coldef = self.newcolumns(data)
103
+ raise "buildinitialtable: no coldef" unless coldef.length > 0
104
+ # coldef = coldef[:1] # just put one column in; the rest could be altered -- to prove it's good
105
+ scoldef = coldef.map { |col| format("`%s` %s", col[0], col[1]) }.join(",")
106
+ # used to just add date_scraped in, but without it can't create an empty table
107
+ @db.execute(format("create table main.`%s` (%s)", @swdatatblname, scoldef))
108
+ end
109
+
110
+ def newcolumns(data)
111
+ newcols = [ ]
112
+ for k, v in data
113
+ if !@swdatakeys.include?(k)
114
+ if v != nil
115
+ #if k[-5:] == "_blob"
116
+ # vt = "blob" # coerced into affinity none
117
+ if v.class == Fixnum
118
+ vt = "integer"
119
+ elsif v.class == Float
120
+ vt = "real"
121
+ else
122
+ vt = "text"
123
+ end
124
+ newcols.push([k, vt])
125
+ end
126
+ end
127
+ end
128
+ # puts "newcols=" + newcols.to_s
129
+ return newcols
130
+ end
131
+
132
+ def addnewcolumn(k, vt)
133
+ @db.execute(format("alter table main.`%s` add column `%s` %s", @swdatatblname, k, vt))
134
+ end
135
+
136
+ def findclosestindex(unique_keys)
137
+ idxlist = @db.execute(format("PRAGMA main.index_list(`%s`)", @swdatatblname)) # [seq,name,unique]
138
+ # puts "findclosestindex: idxlist is "+ idxlist.to_s
139
+ if idxlist.include?('error')
140
+ return [nil, nil]
141
+ end
142
+
143
+ uniqueindexes = [ ]
144
+ for idxel in idxlist
145
+ if idxel[2]
146
+ idxname = idxel[1]
147
+ idxinfo = @db.execute(format("PRAGMA main.index_info(`%s`)", idxname)) # [seqno,cid,name]
148
+ idxset = idxinfo.map { |a| a[2] }.to_set
149
+ idxoverlap = idxset.intersection(unique_keys).length
150
+ uniqueindexes.push([idxoverlap, idxname, idxset])
151
+ end
152
+ end
153
+
154
+ if uniqueindexes.length == 0
155
+ return [nil, nil]
156
+ end
157
+ uniqueindexes.sort()
158
+ # puts "uniqueindexes=" + uniqueindexes.to_s
159
+ return [uniqueindexes[-1][1], uniqueindexes[-1][2]]
160
+ end
161
+
162
+ # increment to next index number every time there is a change, and add the new index before dropping the old one.
163
+ def makenewindex(idxname, unique_keys)
164
+ istart = 0
165
+ if idxname
166
+ mnum = re.search("(\d+)$", idxname)
167
+ if mnum
168
+ istart = int(mnum.group(1))
169
+ end
170
+ end
171
+ for i in 0..10000
172
+ newidxname = format("%s_index%d", @swdatatblname, istart+i)
173
+ does_exist = @db.get_first_value("select count(*) from main.sqlite_master where name=?", newidxname)
174
+ if does_exist == 0
175
+ break
176
+ end
177
+ end
178
+
179
+ res = { "newindex" => newidxname }
180
+ lres = @db.execute(format("create unique index `%s` on `%s` (%s)", newidxname, @swdatatblname, unique_keys.map { |k| format("`%s`", k) }.join(",")))
181
+ if lres.include?('error')
182
+ return lres
183
+ end
184
+ if idxname
185
+ lres = @db.execute(format("drop index main.`%s`", idxname))
186
+ if lres.include?('error')
187
+ if lres['error'] != 'sqlite3.Error: index associated with UNIQUE or PRIMARY KEY constraint cannot be dropped'
188
+ return lres
189
+ end
190
+ end
191
+ res["droppedindex"] = idxname
192
+ end
193
+ return res
194
+ end
195
+
196
+ def insertdata(data)
197
+ values = @swdatakeys.map { |k| data[k] } # this was data.get(k) in Python
198
+ return @db.query(@sqdatatemplate, values)
199
+ end
200
+ end
201
+
202
+ end
203
+
204
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraperwiki
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -18,6 +18,7 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/scraperwiki.rb
21
+ - lib/scraperwiki/sqlite_save_info.rb
21
22
  homepage: http://rubygems.org/gems/scraperwiki
22
23
  licenses: []
23
24
  post_install_message: