scraperwiki 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ # Builds schemas automatically from a hash, for SQLite databases
2
+ #
3
+ # Ported from ScraperWiki Classic - scraperwiki/services/datastore/datalib.py
4
+ # This will make the code quite unRubyish - it is Julian Todd's Python, ported.
5
+
6
+
7
+ # TODO:
8
+ # Sort out 'error' bits
9
+
10
+ require 'set'
11
+ require 'sqlite3'
12
+
13
+ module SQLiteMagic
14
+ @db = SQLite3::Database.new("scraperwiki.sqlite")
15
+ @sqlitesaveinfo = {}
16
+
17
+ def SQLiteMagic._do_save_sqlite(unique_keys, data, swdatatblname)
18
+ res = { }
19
+ if data.class == Hash
20
+ data = [data]
21
+ end
22
+
23
+ if !@sqlitesaveinfo.include?(swdatatblname)
24
+ ssinfo = SqliteSaveInfo.new(swdatatblname, @db)
25
+ @sqlitesaveinfo[swdatatblname] = ssinfo
26
+ if not ssinfo.rebuildinfo() and data.length > 0
27
+ ssinfo.buildinitialtable(data[0])
28
+ ssinfo.rebuildinfo()
29
+ res["tablecreated"] = swdatatblname
30
+ end
31
+ else
32
+ ssinfo = @sqlitesaveinfo[swdatatblname]
33
+ end
34
+
35
+ @db.transaction()
36
+
37
+ nrecords = 0
38
+ data.each do |ldata|
39
+ newcols = ssinfo.newcolumns(ldata)
40
+ if newcols.length > 0
41
+ newcols.each_with_index do |kv, i|
42
+ ssinfo.addnewcolumn(kv[0], kv[1])
43
+ res["newcolumn %d" % i] = "%s %s" % kv
44
+ end
45
+ ssinfo.rebuildinfo()
46
+ end
47
+
48
+ if nrecords == 0 && unique_keys.length > 0
49
+ idxname, idxkeys = ssinfo.findclosestindex(unique_keys)
50
+ # puts "findclosestindex returned name:"+ idxname.to_s + " keys:" + idxkeys.to_s
51
+ if !idxname || idxkeys != unique_keys.to_set
52
+ lres = ssinfo.makenewindex(idxname, unique_keys)
53
+ if lres.include?('error')
54
+ return lres
55
+ end
56
+ res.merge!(lres)
57
+ end
58
+ end
59
+
60
+ lres = ssinfo.insertdata(ldata)
61
+ if lres.include?('error')
62
+ return lres
63
+ end
64
+ nrecords += 1
65
+ end
66
+
67
+ @db.commit()
68
+ # log(nrecords + " inserted or replaced")
69
+ return res
70
+ end
71
+
72
+
73
+ class SqliteSaveInfo
74
+ def initialize(swdatatblname, db)
75
+ @swdatatblname = swdatatblname
76
+ @swdatakeys = [ ]
77
+ @swdatatypes = [ ]
78
+ @sqdatatemplate = ""
79
+ @db = db
80
+ end
81
+
82
+ def rebuildinfo()
83
+ does_exist = @db.get_first_value("select count(*) from main.sqlite_master where name=?", @swdatatblname)
84
+ if does_exist == 0
85
+ return false
86
+ end
87
+
88
+ tblinfo = @db.execute("PRAGMA main.table_info(`%s`)" % @swdatatblname)
89
+ # puts "tblinfo="+ tblinfo.to_s
90
+ # there's a bug: PRAGMA main.table_info(swdata) returns the schema for otherdatabase.swdata
91
+ # following an attach otherdatabase where otherdatabase has a swdata and main does not
92
+
93
+ @swdatakeys = tblinfo.map { |a| a[1] }
94
+ @swdatatypes = tblinfo.map { |a| a[2] }
95
+ @sqdatatemplate = format("insert or replace into main.`%s` values (%s)", @swdatatblname, (["?"]*@swdatakeys.length).join(","))
96
+ return true
97
+ end
98
+
99
+
100
+ def buildinitialtable(data)
101
+ raise "buildinitialtable: no swdatakeys" unless @swdatakeys.length == 0
102
+ coldef = self.newcolumns(data)
103
+ raise "buildinitialtable: no coldef" unless coldef.length > 0
104
+ # coldef = coldef[:1] # just put one column in; the rest could be altered -- to prove it's good
105
+ scoldef = coldef.map { |col| format("`%s` %s", col[0], col[1]) }.join(",")
106
+ # used to just add date_scraped in, but without it can't create an empty table
107
+ @db.execute(format("create table main.`%s` (%s)", @swdatatblname, scoldef))
108
+ end
109
+
110
+ def newcolumns(data)
111
+ newcols = [ ]
112
+ for k, v in data
113
+ if !@swdatakeys.include?(k)
114
+ if v != nil
115
+ #if k[-5:] == "_blob"
116
+ # vt = "blob" # coerced into affinity none
117
+ if v.class == Fixnum
118
+ vt = "integer"
119
+ elsif v.class == Float
120
+ vt = "real"
121
+ else
122
+ vt = "text"
123
+ end
124
+ newcols.push([k, vt])
125
+ end
126
+ end
127
+ end
128
+ # puts "newcols=" + newcols.to_s
129
+ return newcols
130
+ end
131
+
132
+ def addnewcolumn(k, vt)
133
+ @db.execute(format("alter table main.`%s` add column `%s` %s", @swdatatblname, k, vt))
134
+ end
135
+
136
+ def findclosestindex(unique_keys)
137
+ idxlist = @db.execute(format("PRAGMA main.index_list(`%s`)", @swdatatblname)) # [seq,name,unique]
138
+ # puts "findclosestindex: idxlist is "+ idxlist.to_s
139
+ if idxlist.include?('error')
140
+ return [nil, nil]
141
+ end
142
+
143
+ uniqueindexes = [ ]
144
+ for idxel in idxlist
145
+ if idxel[2]
146
+ idxname = idxel[1]
147
+ idxinfo = @db.execute(format("PRAGMA main.index_info(`%s`)", idxname)) # [seqno,cid,name]
148
+ idxset = idxinfo.map { |a| a[2] }.to_set
149
+ idxoverlap = idxset.intersection(unique_keys).length
150
+ uniqueindexes.push([idxoverlap, idxname, idxset])
151
+ end
152
+ end
153
+
154
+ if uniqueindexes.length == 0
155
+ return [nil, nil]
156
+ end
157
+ uniqueindexes.sort()
158
+ # puts "uniqueindexes=" + uniqueindexes.to_s
159
+ return [uniqueindexes[-1][1], uniqueindexes[-1][2]]
160
+ end
161
+
162
+ # increment to next index number every time there is a change, and add the new index before dropping the old one.
163
+ def makenewindex(idxname, unique_keys)
164
+ istart = 0
165
+ if idxname
166
+ mnum = re.search("(\d+)$", idxname)
167
+ if mnum
168
+ istart = int(mnum.group(1))
169
+ end
170
+ end
171
+ for i in 0..10000
172
+ newidxname = format("%s_index%d", @swdatatblname, istart+i)
173
+ does_exist = @db.get_first_value("select count(*) from main.sqlite_master where name=?", newidxname)
174
+ if does_exist == 0
175
+ break
176
+ end
177
+ end
178
+
179
+ res = { "newindex" => newidxname }
180
+ lres = @db.execute(format("create unique index `%s` on `%s` (%s)", newidxname, @swdatatblname, unique_keys.map { |k| format("`%s`", k) }.join(",")))
181
+ if lres.include?('error')
182
+ return lres
183
+ end
184
+ if idxname
185
+ lres = @db.execute(format("drop index main.`%s`", idxname))
186
+ if lres.include?('error')
187
+ if lres['error'] != 'sqlite3.Error: index associated with UNIQUE or PRIMARY KEY constraint cannot be dropped'
188
+ return lres
189
+ end
190
+ end
191
+ res["droppedindex"] = idxname
192
+ end
193
+ return res
194
+ end
195
+
196
+ def insertdata(data)
197
+ values = @swdatakeys.map { |k| data[k] } # this was data.get(k) in Python
198
+ return @db.query(@sqdatatemplate, values)
199
+ end
200
+ end
201
+
202
+ end
203
+
204
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraperwiki
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -18,6 +18,7 @@ extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/scraperwiki.rb
21
+ - lib/scraperwiki/sqlite_save_info.rb
21
22
  homepage: http://rubygems.org/gems/scraperwiki
22
23
  licenses: []
23
24
  post_install_message: