scraperwiki 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scraperwiki.rb +480 -0
- data/lib/scraperwiki/datastore.rb +109 -0
- data/lib/scraperwiki/stacktrace.rb +51 -0
- data/lib/version.rb +4 -0
- metadata +98 -0
data/lib/scraperwiki.rb
ADDED
@@ -0,0 +1,480 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'scraperwiki/datastore'
|
5
|
+
require 'httpclient'
|
6
|
+
|
7
|
+
class SqliteException < RuntimeError
|
8
|
+
end
|
9
|
+
|
10
|
+
class NoSuchTableSqliteException < SqliteException
|
11
|
+
end
|
12
|
+
|
13
|
+
$apiwrapperattacheddata = [ ]
|
14
|
+
|
15
|
+
module ScraperWiki
|
16
|
+
|
17
|
+
$metadatamessagedone = false
|
18
|
+
$attachlist = [ ]
|
19
|
+
|
20
|
+
def ScraperWiki.dumpMessage(hash)
|
21
|
+
msg = JSON.generate(hash)
|
22
|
+
$logfd.write( "JSONRECORD(" + msg.length.to_s() + "):" + msg + "\n")
|
23
|
+
$logfd.flush()
|
24
|
+
end
|
25
|
+
|
26
|
+
def ScraperWiki.httpresponseheader(headerkey, headervalue)
|
27
|
+
ScraperWiki.dumpMessage({'message_type' => 'httpresponseheader', 'headerkey' => headerkey, 'headervalue' => headervalue})
|
28
|
+
end
|
29
|
+
|
30
|
+
def ScraperWiki.scrape(url, params = nil)
|
31
|
+
client = HTTPClient.new
|
32
|
+
client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
33
|
+
|
34
|
+
if params.nil?
|
35
|
+
return client.get_content(url)
|
36
|
+
else
|
37
|
+
return client.post_content(url, params)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def ScraperWiki.gb_postcode_to_latlng(postcode)
|
42
|
+
uri = URI.parse("http://views.scraperwiki.com/run/uk_postcode_lookup/?postcode="+URI.escape(postcode))
|
43
|
+
sres = Net::HTTP.get(uri)
|
44
|
+
jres = JSON.parse(sres)
|
45
|
+
if jres["lat"] and jres["lng"]
|
46
|
+
return [jres["lat"], jres["lng"]]
|
47
|
+
end
|
48
|
+
return nil
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
def ScraperWiki._unicode_truncate(string, size)
|
54
|
+
# Stops 2 byte unicode characters from being chopped in half which kills JSON serializer
|
55
|
+
string.scan(/./u)[0,size].join
|
56
|
+
end
|
57
|
+
|
58
|
+
def ScraperWiki.save(unique_keys, data, date=nil, latlng=nil, table_name="swdata")
|
59
|
+
if unique_keys != nil && !unique_keys.kind_of?(Array)
|
60
|
+
raise 'unique_keys must be nil or an array'
|
61
|
+
end
|
62
|
+
if data == nil
|
63
|
+
raise 'data must have a non-nil value'
|
64
|
+
end
|
65
|
+
|
66
|
+
ds = SW_DataStore.create()
|
67
|
+
ldata = data.dup
|
68
|
+
if date != nil
|
69
|
+
ldata["date"] = date
|
70
|
+
end
|
71
|
+
if latlng != nil
|
72
|
+
ldata["latlng_lat"] = latlng[0]
|
73
|
+
ldata["latlng_lng"] = latlng[1]
|
74
|
+
end
|
75
|
+
return ScraperWiki.save_sqlite(unique_keys, ldata, table_name="swdata", verbose=2)
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
def ScraperWiki.sqliteexecute(sqlquery, data=nil, verbose=2)
|
80
|
+
ds = SW_DataStore.create()
|
81
|
+
if ds.m_webstore_port == 0
|
82
|
+
res = ds.request({'maincommand'=>'sqliteexecute', 'sqlquery'=>sqlquery, 'data'=>data, 'attachlist'=>$attachlist})
|
83
|
+
else
|
84
|
+
username = 'resourcedir' # gets it into the right subdirectory automatically!!!
|
85
|
+
dirscrapername = ds.m_scrapername
|
86
|
+
if ds.m_scrapername == '' or ds.m_scrapername.nil?
|
87
|
+
dirscrapername = 'DRAFT__' + ds.m_runid.gsub(/[\.\-]/, '_')
|
88
|
+
end
|
89
|
+
path = "%s/%s" % [username, dirscrapername]
|
90
|
+
|
91
|
+
record = {"query"=>sqlquery, "params"=>data, "attach"=>[]}
|
92
|
+
$attachlist.each do |value|
|
93
|
+
record["attach"].push({"user"=>username, "database"=>value["name"], "alias"=>value["asattach"], "securityhash"=>"somthing"})
|
94
|
+
end
|
95
|
+
|
96
|
+
httpcall = Net::HTTP.new(ds.m_host, ds.m_webstore_port)
|
97
|
+
headers = { "Accept"=>"application/json+tuples", "X-Scrapername"=>ds.m_scrapername, "X-Runid"=>ds.m_runid, "Content-Type"=>"application/json" }
|
98
|
+
response = httpcall.put(path, JSON.generate(record), headers)
|
99
|
+
res = JSON.parse(response.body)
|
100
|
+
if res["state"] == "error"
|
101
|
+
ScraperWiki.raisesqliteerror(res["message"])
|
102
|
+
end
|
103
|
+
if (res.class == Hash) and (res["keys"].class == Array) and (res["data"].class == Array)
|
104
|
+
if res["keys"].include?("state") and (res["data"].length == 1)
|
105
|
+
ddata = Hash[*res["keys"].zip(res["data"][0]).flatten]
|
106
|
+
if ddata["state"] == "error"
|
107
|
+
ScraperWiki.raisesqliteerror(ddata["message"])
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
if verbose
|
114
|
+
if data.kind_of?(Array)
|
115
|
+
data.each do |value|
|
116
|
+
ldata = [ ]
|
117
|
+
if value == nil
|
118
|
+
value = ''
|
119
|
+
end
|
120
|
+
ldata.push(ScraperWiki._unicode_truncate(value.to_s, 50))
|
121
|
+
end
|
122
|
+
else
|
123
|
+
ldata = data
|
124
|
+
end
|
125
|
+
ScraperWiki.dumpMessage({'message_type'=>'sqlitecall', 'command'=>"execute", 'val1'=>sqlquery, 'val2'=>ldata})
|
126
|
+
end
|
127
|
+
return res
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
# this ought to be a local function
|
133
|
+
def ScraperWiki._convdata(unique_keys, scraper_data)
|
134
|
+
if unique_keys
|
135
|
+
for key in unique_keys
|
136
|
+
if !key.kind_of?(String) and !key.kind_of?(Symbol)
|
137
|
+
return { "error" => 'unique_keys must each be a string or a symbol', "bad_key" => key }
|
138
|
+
end
|
139
|
+
if !scraper_data.include?(key) and !scraper_data.include?(key.to_sym)
|
140
|
+
return { "error" => 'unique_keys must be a subset of data', "bad_key" => key }
|
141
|
+
end
|
142
|
+
if scraper_data[key] == nil and scraper_data[key.to_sym] == nil
|
143
|
+
return { "error" => 'unique_key value should not be nil', "bad_key" => key }
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
jdata = { }
|
149
|
+
scraper_data.each_pair do |key, value|
|
150
|
+
if not key
|
151
|
+
return { "error" => 'key must not be blank', "bad_key" => key }
|
152
|
+
end
|
153
|
+
if key.kind_of?(Symbol)
|
154
|
+
key = key.to_s
|
155
|
+
end
|
156
|
+
if key.class != String
|
157
|
+
return { "error" => 'key must be string type', "bad_key" => key }
|
158
|
+
end
|
159
|
+
|
160
|
+
if !/[a-zA-Z0-9_\- ]+$/.match(key)
|
161
|
+
return { "error"=>'key must be simple text', "bad_key"=> key }
|
162
|
+
end
|
163
|
+
|
164
|
+
if value.kind_of?(Date)
|
165
|
+
value = value.iso8601
|
166
|
+
end
|
167
|
+
if value.kind_of?(Time)
|
168
|
+
value = value.iso8601
|
169
|
+
raise "internal error, timezone came out as non-UTC while converting to SQLite format" unless value.match(/\+00:00$/)
|
170
|
+
value.gsub!(/\+00:00$/, '')
|
171
|
+
end
|
172
|
+
if ![Fixnum, Float, String, TrueClass, FalseClass, NilClass].include?(value.class)
|
173
|
+
value = value.to_s
|
174
|
+
end
|
175
|
+
jdata[key] = value
|
176
|
+
end
|
177
|
+
return jdata
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
def ScraperWiki.save_sqlite(unique_keys, data, table_name="swdata", verbose=2)
|
182
|
+
if !data
|
183
|
+
ScraperWiki.dumpMessage({'message_type' => 'data', 'content' => "EMPTY SAVE IGNORED"})
|
184
|
+
return
|
185
|
+
end
|
186
|
+
|
187
|
+
# convert :symbols to "strings"
|
188
|
+
unique_keys = unique_keys.map { |x| x.kind_of?(Symbol) ? x.to_s : x }
|
189
|
+
|
190
|
+
if data.class == Hash
|
191
|
+
data = [ data ]
|
192
|
+
end
|
193
|
+
|
194
|
+
rjdata = [ ]
|
195
|
+
for ldata in data
|
196
|
+
ljdata = _convdata(unique_keys, ldata)
|
197
|
+
if ljdata.include?("error")
|
198
|
+
raise SqliteException.new(ljdata["error"])
|
199
|
+
end
|
200
|
+
rjdata.push(ljdata)
|
201
|
+
end
|
202
|
+
|
203
|
+
ds = SW_DataStore.create()
|
204
|
+
if ds.m_webstore_port == 0
|
205
|
+
res = ds.request({'maincommand'=>'save_sqlite', 'unique_keys'=>unique_keys, 'data'=>rjdata, 'swdatatblname'=>table_name})
|
206
|
+
else
|
207
|
+
username = 'resourcedir' # gets it into the right subdirectory automatically!!!
|
208
|
+
dirscrapername = ds.m_scrapername
|
209
|
+
if ds.m_scrapername == '' or ds.m_scrapername.nil?
|
210
|
+
dirscrapername = 'DRAFT__' + ds.m_runid.gsub(/[\.\-]/, '_')
|
211
|
+
end
|
212
|
+
|
213
|
+
# (do something about jargtypes later)
|
214
|
+
qsl = [ ]
|
215
|
+
unique_keys.each do |key|
|
216
|
+
qsl.push("unique="+URI.encode(key))
|
217
|
+
end
|
218
|
+
|
219
|
+
# quick and dirty provision of column types to the webstore
|
220
|
+
if rjdata.length != 0
|
221
|
+
jargtypes = { }
|
222
|
+
rjdata[0].each_pair do |k, v|
|
223
|
+
if v != nil
|
224
|
+
#if k[-5..-1] == "_blob"
|
225
|
+
# vt = "blob" # coerced into affinity none
|
226
|
+
if v.class == Fixnum
|
227
|
+
vt = "integer"
|
228
|
+
elsif v.class == Float
|
229
|
+
vt = "real"
|
230
|
+
else
|
231
|
+
vt = "text"
|
232
|
+
end
|
233
|
+
jargtypes[k] = vt
|
234
|
+
end
|
235
|
+
end
|
236
|
+
qsl.push(("jargtypes="+JSON.generate(jargtypes)))
|
237
|
+
end
|
238
|
+
|
239
|
+
path = "%s/%s/%s?%s" % [username, dirscrapername, table_name, qsl.join("&")]
|
240
|
+
#puts JSON.generate(rjdata)
|
241
|
+
httpcall = Net::HTTP.new(ds.m_host, ds.m_webstore_port)
|
242
|
+
headers = { "Accept"=>"application/json", "X-Scrapername"=>ds.m_scrapername, "X-Runid"=>ds.m_runid, "Content-Type"=>"application/json" }
|
243
|
+
response = httpcall.post(path, JSON.generate(rjdata), headers)
|
244
|
+
#puts response.body
|
245
|
+
res = JSON.parse(response.body)
|
246
|
+
if res["state"] == "error"
|
247
|
+
res["error"] = res["message"]
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
|
252
|
+
if res["error"]
|
253
|
+
raise SqliteException.new(res["error"])
|
254
|
+
end
|
255
|
+
|
256
|
+
if verbose >= 2
|
257
|
+
pdata = { }
|
258
|
+
if rjdata.class == Hash
|
259
|
+
sdata = rjdata
|
260
|
+
else
|
261
|
+
sdata = rjdata[0]
|
262
|
+
end
|
263
|
+
sdata.each_pair do |key, value|
|
264
|
+
key = ScraperWiki._unicode_truncate(key.to_s, 50)
|
265
|
+
if value == nil
|
266
|
+
value = ''
|
267
|
+
else
|
268
|
+
value = ScraperWiki._unicode_truncate(value.to_s, 50)
|
269
|
+
end
|
270
|
+
pdata[key] = String(value)
|
271
|
+
end
|
272
|
+
if rjdata.class == Array and rjdata.size > 1
|
273
|
+
pdata["number_records"] = "Number Records: "+String(rjdata.size)
|
274
|
+
end
|
275
|
+
ScraperWiki.dumpMessage({'message_type' => 'data', 'content' => pdata})
|
276
|
+
end
|
277
|
+
return res
|
278
|
+
end
|
279
|
+
|
280
|
+
# also needs to handle the types better (could save json and datetime objects handily
|
281
|
+
def ScraperWiki.save_var(name, value, verbose=2)
|
282
|
+
vtype = String(value.class)
|
283
|
+
svalue = value.to_s
|
284
|
+
if vtype != "Fixnum" and vtype != "String" and vtype != "Float" and vtype != "NilClass"
|
285
|
+
puts "*** object of type "+vtype+" converted to string\n"
|
286
|
+
end
|
287
|
+
data = { "name" => name, "value_blob" => svalue, "type" => vtype }
|
288
|
+
ScraperWiki.save_sqlite(unique_keys=["name"], data=data, table_name="swvariables", verbose=verbose)
|
289
|
+
end
|
290
|
+
|
291
|
+
def ScraperWiki.get_var(name, default=nil, verbose=2)
|
292
|
+
begin
|
293
|
+
result = ScraperWiki.sqliteexecute("select value_blob, type from swvariables where name=?", [name], verbose)
|
294
|
+
rescue NoSuchTableSqliteException => e
|
295
|
+
return default
|
296
|
+
end
|
297
|
+
|
298
|
+
if !result.has_key?("data")
|
299
|
+
return default
|
300
|
+
end
|
301
|
+
|
302
|
+
if result["data"].length == 0
|
303
|
+
return default
|
304
|
+
end
|
305
|
+
# consider casting to type
|
306
|
+
svalue = result["data"][0][0]
|
307
|
+
vtype = result["data"][0][1]
|
308
|
+
if vtype == "Fixnum"
|
309
|
+
return svalue.to_i
|
310
|
+
end
|
311
|
+
if vtype == "Float"
|
312
|
+
return svalue.to_f
|
313
|
+
end
|
314
|
+
if vtype == "NilClass"
|
315
|
+
return nil
|
316
|
+
end
|
317
|
+
return svalue
|
318
|
+
end
|
319
|
+
|
320
|
+
# These are DEPRECATED and just here for compatibility
|
321
|
+
def ScraperWiki.get_metadata(metadata_name, default = nil)
|
322
|
+
if !$metadatamessagedone == nil
|
323
|
+
puts "*** instead of get_metadata('"+metadata_name+"') please use\n get_var('"+metadata_name+"')"
|
324
|
+
metadatamessagedone = true
|
325
|
+
end
|
326
|
+
result = ScraperWiki.get_var(metadata_name, default)
|
327
|
+
return result
|
328
|
+
end
|
329
|
+
|
330
|
+
# These are DEPRECATED and just here for compatibility
|
331
|
+
def ScraperWiki.save_metadata(metadata_name, value)
|
332
|
+
if !$metadatamessagedone
|
333
|
+
puts "*** instead of save_metadata('"+metadata_name+"') please use\n save_var('"+metadata_name+"')"
|
334
|
+
$metadatamessagedone = true
|
335
|
+
end
|
336
|
+
return ScraperWiki.save_var(metadata_name, value)
|
337
|
+
end
|
338
|
+
|
339
|
+
|
340
|
+
def ScraperWiki.show_tables(dbname=nil)
|
341
|
+
name = "sqlite_master"
|
342
|
+
if dbname != nil
|
343
|
+
name = "`"+dbname+"`.sqlite_master"
|
344
|
+
end
|
345
|
+
result = ScraperWiki.sqliteexecute("select tbl_name, sql from "+name+" where type='table'")
|
346
|
+
#return result["data"]
|
347
|
+
return (Hash[*result["data"].flatten]) # pre-1.8.7
|
348
|
+
end
|
349
|
+
|
350
|
+
|
351
|
+
def ScraperWiki.table_info(name)
|
352
|
+
sname = name.split(".")
|
353
|
+
if sname.length == 2
|
354
|
+
result = ScraperWiki.sqliteexecute("PRAGMA %s.table_info(`%s`)" % sname)
|
355
|
+
else
|
356
|
+
result = ScraperWiki.sqliteexecute("PRAGMA table_info(`%s`)" % name)
|
357
|
+
end
|
358
|
+
res = [ ]
|
359
|
+
for d in result["data"]
|
360
|
+
res.push(Hash[*result["keys"].zip(d).flatten]) # pre-1.8.7
|
361
|
+
end
|
362
|
+
return res
|
363
|
+
end
|
364
|
+
|
365
|
+
|
366
|
+
def ScraperWiki.getDataByDate(name, start_date, end_date, limit=-1, offset=0)
|
367
|
+
raise SqliteException.new("getDataByDate has been deprecated")
|
368
|
+
end
|
369
|
+
|
370
|
+
def ScraperWiki.getDataByLocation(name, lat, lng, limit=-1, offset=0)
|
371
|
+
raise SqliteException.new("getDataByLocation has been deprecated")
|
372
|
+
end
|
373
|
+
|
374
|
+
def ScraperWiki.search(name, filterdict, limit=-1, offset=0)
|
375
|
+
raise SqliteException.new("SW_APIWrapper.search has been deprecated")
|
376
|
+
end
|
377
|
+
|
378
|
+
def ScraperWiki.raisesqliteerror(rerror)
|
379
|
+
if /sqlite3.Error: no such table:/.match(rerror) # old dataproxy
|
380
|
+
raise NoSuchTableSqliteException.new(rerror)
|
381
|
+
end
|
382
|
+
if /DB Error: \(OperationalError\) no such table:/.match(rerror)
|
383
|
+
raise NoSuchTableSqliteException.new(rerror)
|
384
|
+
end
|
385
|
+
raise SqliteException.new(rerror)
|
386
|
+
end
|
387
|
+
|
388
|
+
def ScraperWiki.attach(name, asname=nil, verbose=1)
|
389
|
+
$attachlist.push({"name"=>name, "asname"=>asname})
|
390
|
+
|
391
|
+
ds = SW_DataStore.create()
|
392
|
+
|
393
|
+
if ds.m_webstore_port == 0
|
394
|
+
res = ds.request({'maincommand'=>'sqlitecommand', 'command'=>"attach", 'name'=>name, 'asname'=>asname})
|
395
|
+
if res["error"]
|
396
|
+
ScraperWiki.raisesqliteerror(res)
|
397
|
+
end
|
398
|
+
else
|
399
|
+
res = {'status'=>'ok'}
|
400
|
+
end
|
401
|
+
|
402
|
+
if verbose
|
403
|
+
ScraperWiki.dumpMessage({'message_type'=>'sqlitecall', 'command'=>"attach", 'val1'=>name, 'val2'=>asname})
|
404
|
+
end
|
405
|
+
|
406
|
+
return res
|
407
|
+
end
|
408
|
+
|
409
|
+
|
410
|
+
def ScraperWiki.commit(verbose=1)
|
411
|
+
ds = SW_DataStore.create()
|
412
|
+
if ds.m_webstore_port == 0
|
413
|
+
res = ds.request({'maincommand'=>'sqlitecommand', 'command'=>"commit"})
|
414
|
+
else
|
415
|
+
puts "*** commit() no longer a necessary function call"
|
416
|
+
res = {'status'=>'ok'}
|
417
|
+
end
|
418
|
+
end
|
419
|
+
|
420
|
+
def ScraperWiki.select(sqlquery, data=nil, verbose=1)
|
421
|
+
if data != nil && sqlquery.scan(/\?/).length != 0 && data.class != Array
|
422
|
+
data = [data]
|
423
|
+
end
|
424
|
+
result = ScraperWiki.sqliteexecute("select "+sqlquery, data, verbose)
|
425
|
+
res = [ ]
|
426
|
+
for d in result["data"]
|
427
|
+
#res.push(Hash[result["keys"].zip(d)]) # post-1.8.7
|
428
|
+
res.push(Hash[*result["keys"].zip(d).flatten]) # pre-1.8.7
|
429
|
+
end
|
430
|
+
return res
|
431
|
+
end
|
432
|
+
|
433
|
+
# old functions put back in for regression
|
434
|
+
def ScraperWiki.getData(name, limit=-1, offset=0)
|
435
|
+
if !$apiwrapperattacheddata.include?(name)
|
436
|
+
puts "*** instead of getData('"+name+"') please use\n ScraperWiki.attach('"+name+"') \n print ScraperWiki.select('* from `"+name+"`.swdata')"
|
437
|
+
ScraperWiki.attach(name)
|
438
|
+
$apiwrapperattacheddata.push(name)
|
439
|
+
end
|
440
|
+
|
441
|
+
apilimit = 500
|
442
|
+
g = Enumerator.new do |g|
|
443
|
+
count = 0
|
444
|
+
while true
|
445
|
+
if limit == -1
|
446
|
+
step = apilimit
|
447
|
+
else
|
448
|
+
step = apilimit < (limit - count) ? apilimit : limit - count
|
449
|
+
end
|
450
|
+
query = "* from `#{name}`.swdata limit #{step} offset #{offset+count}"
|
451
|
+
|
452
|
+
records = ScraperWiki.select(query)
|
453
|
+
for r in records
|
454
|
+
g.yield r
|
455
|
+
end
|
456
|
+
|
457
|
+
count += records.length
|
458
|
+
if records.length < step
|
459
|
+
break
|
460
|
+
end
|
461
|
+
if limit != -1 and count >= limit
|
462
|
+
break
|
463
|
+
end
|
464
|
+
end
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
def ScraperWiki.getKeys(name)
|
469
|
+
if !$apiwrapperattacheddata.include?(name)
|
470
|
+
puts "*** instead of getKeys('"+name+"') please use\n ScraperWiki.attach('"+name+"') \n print ScraperWiki.sqliteexecute('select * from `"+name+"`.swdata limit 0')['keys']"
|
471
|
+
ScraperWiki.attach(name)
|
472
|
+
$apiwrapperattacheddata.push(name)
|
473
|
+
end
|
474
|
+
result = ScraperWiki.sqliteexecute("select * from `"+name+"`.swdata limit 0")
|
475
|
+
if result.include?("error")
|
476
|
+
raise SqliteException.new(result["error"])
|
477
|
+
end
|
478
|
+
return result["keys"]
|
479
|
+
end
|
480
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'singleton'
|
3
|
+
require 'thread'
|
4
|
+
require 'cgi'
|
5
|
+
|
6
|
+
# the python version of this makes use of a global static copy of the class
|
7
|
+
# so the connection is made only once to the dataproxy
|
8
|
+
# I think the Singleton module implements this magically
|
9
|
+
|
10
|
+
class SW_DataStore
|
11
|
+
|
12
|
+
@@lock = Mutex.new
|
13
|
+
|
14
|
+
include Singleton
|
15
|
+
|
16
|
+
attr_accessor :m_port, :m_host, :m_scrapername, :m_runid, :m_attachables, :m_webstore_port
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
@m_socket = nil
|
20
|
+
@m_host = nil
|
21
|
+
@m_port = nil
|
22
|
+
@m_scrapername = ''
|
23
|
+
@m_runid = ''
|
24
|
+
@m_attachables = []
|
25
|
+
@webstore_port = 0
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def ensure_connected
|
30
|
+
# Connect to the data proxy. The data proxy will need to make an Ident call
|
31
|
+
# back to get the scraperID. Since the data proxy may be on another machine
|
32
|
+
# and the peer address it sees will have been subject to NAT or masquerading,
|
33
|
+
# send the UML name and the socket port number in the request.
|
34
|
+
|
35
|
+
if @m_socket == nil
|
36
|
+
@m_socket = TCPSocket.open(@m_host, @m_port)
|
37
|
+
proto, port, name, ip = @m_socket.addr()
|
38
|
+
if @m_scrapername == '' or @m_scrapername.nil?
|
39
|
+
sname = ''
|
40
|
+
else
|
41
|
+
sname = CGI::escape(@m_scrapername)
|
42
|
+
end
|
43
|
+
if @m_runid == '' or @m_runid.nil?
|
44
|
+
rid = ''
|
45
|
+
else
|
46
|
+
rid = CGI::escape(@m_runid)
|
47
|
+
end
|
48
|
+
|
49
|
+
getmsg = "GET /?uml=%s&port=%s&vscrapername=%s&vrunid=%s HTTP/1.1\n\n" % ['lxc', port, sname, rid]
|
50
|
+
@m_socket.send(getmsg, 0)
|
51
|
+
@m_socket.flush()
|
52
|
+
|
53
|
+
buffer = @m_socket.recv(1024)
|
54
|
+
result = JSON.parse(buffer)
|
55
|
+
if result["status"] != "good"
|
56
|
+
raise result["status"]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def request (req)
|
62
|
+
text = ''
|
63
|
+
@@lock.synchronize {
|
64
|
+
ensure_connected
|
65
|
+
reqmsg = JSON.generate(req) + "\n"
|
66
|
+
|
67
|
+
bytes_sent = 0
|
68
|
+
while bytes_sent < reqmsg.length
|
69
|
+
bytes_sent += @m_socket.send(reqmsg.slice(bytes_sent, reqmsg.length), 0)
|
70
|
+
end
|
71
|
+
@m_socket.flush()
|
72
|
+
|
73
|
+
while true
|
74
|
+
buffer = @m_socket.recv(1024)
|
75
|
+
if buffer.length == 0
|
76
|
+
break
|
77
|
+
end
|
78
|
+
text += buffer
|
79
|
+
if text[-1] == "\n"[0]
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
}
|
84
|
+
return JSON.parse(text)
|
85
|
+
end
|
86
|
+
|
87
|
+
# function used to both initialize the settings and get an instance!
|
88
|
+
# this is ridiculous and unnecessary with new webstore.
|
89
|
+
# we are creating object without the fields merely to access the static variables!
|
90
|
+
def SW_DataStore.create(host=nil, port = nil, scrapername = '', runid = nil, attachables = nil, webstore_port = nil)
|
91
|
+
instance = SW_DataStore.instance
|
92
|
+
# so, it might be intended that the host and port are
|
93
|
+
# set once, never to be changed, but this is ruby so
|
94
|
+
# there's no way to guarantee that.
|
95
|
+
if host && port && instance.m_port.nil? && instance.m_host.nil?
|
96
|
+
instance.m_host = host
|
97
|
+
instance.m_port = port
|
98
|
+
instance.m_scrapername = scrapername
|
99
|
+
instance.m_runid = runid
|
100
|
+
instance.m_attachables = attachables
|
101
|
+
instance.m_webstore_port = webstore_port
|
102
|
+
elsif host && port
|
103
|
+
raise "Can't change host and port once connection made"
|
104
|
+
elsif !(instance.m_port) || !(instance.m_host)
|
105
|
+
raise "Can't return a datastore without port/host information"
|
106
|
+
end
|
107
|
+
instance
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
def _get_stackentry(code_filename, code, filename, linenumber, funcname)
|
2
|
+
nlinenumber = linenumber.to_i
|
3
|
+
stackentry = {"file" => filename, "linenumber" => nlinenumber, "duplicates" => 1}
|
4
|
+
|
5
|
+
if filename == "(eval)" or filename == code_filename
|
6
|
+
codelines = code.split("\n")
|
7
|
+
if (nlinenumber >= 1) && (nlinenumber <= codelines.size)
|
8
|
+
stackentry["linetext"] = codelines[nlinenumber-1]
|
9
|
+
elsif (nlinenumber == codelines.size + 1)
|
10
|
+
stackentry["linetext"] = "<end of file>"
|
11
|
+
else
|
12
|
+
stackentry["linetext"] = "getExceptionTraceback: ScraperWiki internal error, line %d out of range in file %s" % [nlinenumber, code_filename]
|
13
|
+
end
|
14
|
+
stackentry["file"] = "<string>"
|
15
|
+
else
|
16
|
+
# XXX bit of a hack to show the line number in third party libraries
|
17
|
+
stackentry["file"] += ":" + linenumber
|
18
|
+
end
|
19
|
+
if funcname
|
20
|
+
stackentry["furtherlinetext"] = funcname
|
21
|
+
end
|
22
|
+
return stackentry
|
23
|
+
end
|
24
|
+
|
25
|
+
def getExceptionTraceback(e, code, code_filename)
|
26
|
+
lbacktrace = e.backtrace.reverse
|
27
|
+
#File.open("/tmp/fairuby", 'a') {|f| f.write(JSON.generate(lbacktrace)) }
|
28
|
+
|
29
|
+
exceptiondescription = e.to_s
|
30
|
+
|
31
|
+
stackdump = []
|
32
|
+
for l in lbacktrace
|
33
|
+
(filename, linenumber, funcname) = l.split(":")
|
34
|
+
|
35
|
+
next if filename.match(/\/exec.rb$/) # skip showing stack of wrapper
|
36
|
+
|
37
|
+
stackentry = _get_stackentry(code_filename, code, filename, linenumber, funcname)
|
38
|
+
stackdump.push(stackentry)
|
39
|
+
end
|
40
|
+
|
41
|
+
if e.kind_of?(SyntaxError)
|
42
|
+
(filename, linenumber, message) = exceptiondescription.split(/[:\n]/, 3)
|
43
|
+
exceptiondescription = message
|
44
|
+
|
45
|
+
stackentry = _get_stackentry(code_filename, code, filename, linenumber, nil)
|
46
|
+
stackdump.push(stackentry)
|
47
|
+
end
|
48
|
+
|
49
|
+
return { 'message_type' => 'exception', 'exceptiondescription' => exceptiondescription, "stackdump" => stackdump }
|
50
|
+
end
|
51
|
+
|
data/lib/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scraperwiki
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 1.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Francis Irving
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-10-09 00:00:00 +02:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: json
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: httpclient
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
49
|
+
description: Ruby code used for accessing
|
50
|
+
email:
|
51
|
+
- francis@scraperwiki.com
|
52
|
+
executables: []
|
53
|
+
|
54
|
+
extensions: []
|
55
|
+
|
56
|
+
extra_rdoc_files: []
|
57
|
+
|
58
|
+
files:
|
59
|
+
- lib/version.rb
|
60
|
+
- lib/scraperwiki.rb
|
61
|
+
- lib/scraperwiki/datastore.rb
|
62
|
+
- lib/scraperwiki/stacktrace.rb
|
63
|
+
has_rdoc: true
|
64
|
+
homepage: http://scraperwiki.com
|
65
|
+
licenses: []
|
66
|
+
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 3
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
hash: 3
|
87
|
+
segments:
|
88
|
+
- 0
|
89
|
+
version: "0"
|
90
|
+
requirements: []
|
91
|
+
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 1.5.2
|
94
|
+
signing_key:
|
95
|
+
specification_version: 3
|
96
|
+
summary: ScraperWiki client library for Ruby
|
97
|
+
test_files: []
|
98
|
+
|