scraperwiki 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/scraperwiki.rb +480 -0
- data/lib/scraperwiki/datastore.rb +109 -0
- data/lib/scraperwiki/stacktrace.rb +51 -0
- data/lib/version.rb +4 -0
- metadata +98 -0
data/lib/scraperwiki.rb
ADDED
@@ -0,0 +1,480 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'uri'
|
3
|
+
require 'net/http'
|
4
|
+
require 'scraperwiki/datastore'
|
5
|
+
require 'httpclient'
|
6
|
+
|
7
|
+
class SqliteException < RuntimeError
|
8
|
+
end
|
9
|
+
|
10
|
+
class NoSuchTableSqliteException < SqliteException
|
11
|
+
end
|
12
|
+
|
13
|
+
$apiwrapperattacheddata = [ ]
|
14
|
+
|
15
|
+
module ScraperWiki
|
16
|
+
|
17
|
+
$metadatamessagedone = false
|
18
|
+
$attachlist = [ ]
|
19
|
+
|
20
|
+
def ScraperWiki.dumpMessage(hash)
|
21
|
+
msg = JSON.generate(hash)
|
22
|
+
$logfd.write( "JSONRECORD(" + msg.length.to_s() + "):" + msg + "\n")
|
23
|
+
$logfd.flush()
|
24
|
+
end
|
25
|
+
|
26
|
+
def ScraperWiki.httpresponseheader(headerkey, headervalue)
|
27
|
+
ScraperWiki.dumpMessage({'message_type' => 'httpresponseheader', 'headerkey' => headerkey, 'headervalue' => headervalue})
|
28
|
+
end
|
29
|
+
|
30
|
+
def ScraperWiki.scrape(url, params = nil)
|
31
|
+
client = HTTPClient.new
|
32
|
+
client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
33
|
+
|
34
|
+
if params.nil?
|
35
|
+
return client.get_content(url)
|
36
|
+
else
|
37
|
+
return client.post_content(url, params)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def ScraperWiki.gb_postcode_to_latlng(postcode)
|
42
|
+
uri = URI.parse("http://views.scraperwiki.com/run/uk_postcode_lookup/?postcode="+URI.escape(postcode))
|
43
|
+
sres = Net::HTTP.get(uri)
|
44
|
+
jres = JSON.parse(sres)
|
45
|
+
if jres["lat"] and jres["lng"]
|
46
|
+
return [jres["lat"], jres["lng"]]
|
47
|
+
end
|
48
|
+
return nil
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
def ScraperWiki._unicode_truncate(string, size)
|
54
|
+
# Stops 2 byte unicode characters from being chopped in half which kills JSON serializer
|
55
|
+
string.scan(/./u)[0,size].join
|
56
|
+
end
|
57
|
+
|
58
|
+
def ScraperWiki.save(unique_keys, data, date=nil, latlng=nil, table_name="swdata")
|
59
|
+
if unique_keys != nil && !unique_keys.kind_of?(Array)
|
60
|
+
raise 'unique_keys must be nil or an array'
|
61
|
+
end
|
62
|
+
if data == nil
|
63
|
+
raise 'data must have a non-nil value'
|
64
|
+
end
|
65
|
+
|
66
|
+
ds = SW_DataStore.create()
|
67
|
+
ldata = data.dup
|
68
|
+
if date != nil
|
69
|
+
ldata["date"] = date
|
70
|
+
end
|
71
|
+
if latlng != nil
|
72
|
+
ldata["latlng_lat"] = latlng[0]
|
73
|
+
ldata["latlng_lng"] = latlng[1]
|
74
|
+
end
|
75
|
+
return ScraperWiki.save_sqlite(unique_keys, ldata, table_name="swdata", verbose=2)
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
def ScraperWiki.sqliteexecute(sqlquery, data=nil, verbose=2)
|
80
|
+
ds = SW_DataStore.create()
|
81
|
+
if ds.m_webstore_port == 0
|
82
|
+
res = ds.request({'maincommand'=>'sqliteexecute', 'sqlquery'=>sqlquery, 'data'=>data, 'attachlist'=>$attachlist})
|
83
|
+
else
|
84
|
+
username = 'resourcedir' # gets it into the right subdirectory automatically!!!
|
85
|
+
dirscrapername = ds.m_scrapername
|
86
|
+
if ds.m_scrapername == '' or ds.m_scrapername.nil?
|
87
|
+
dirscrapername = 'DRAFT__' + ds.m_runid.gsub(/[\.\-]/, '_')
|
88
|
+
end
|
89
|
+
path = "%s/%s" % [username, dirscrapername]
|
90
|
+
|
91
|
+
record = {"query"=>sqlquery, "params"=>data, "attach"=>[]}
|
92
|
+
$attachlist.each do |value|
|
93
|
+
record["attach"].push({"user"=>username, "database"=>value["name"], "alias"=>value["asattach"], "securityhash"=>"somthing"})
|
94
|
+
end
|
95
|
+
|
96
|
+
httpcall = Net::HTTP.new(ds.m_host, ds.m_webstore_port)
|
97
|
+
headers = { "Accept"=>"application/json+tuples", "X-Scrapername"=>ds.m_scrapername, "X-Runid"=>ds.m_runid, "Content-Type"=>"application/json" }
|
98
|
+
response = httpcall.put(path, JSON.generate(record), headers)
|
99
|
+
res = JSON.parse(response.body)
|
100
|
+
if res["state"] == "error"
|
101
|
+
ScraperWiki.raisesqliteerror(res["message"])
|
102
|
+
end
|
103
|
+
if (res.class == Hash) and (res["keys"].class == Array) and (res["data"].class == Array)
|
104
|
+
if res["keys"].include?("state") and (res["data"].length == 1)
|
105
|
+
ddata = Hash[*res["keys"].zip(res["data"][0]).flatten]
|
106
|
+
if ddata["state"] == "error"
|
107
|
+
ScraperWiki.raisesqliteerror(ddata["message"])
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
if verbose
|
114
|
+
if data.kind_of?(Array)
|
115
|
+
data.each do |value|
|
116
|
+
ldata = [ ]
|
117
|
+
if value == nil
|
118
|
+
value = ''
|
119
|
+
end
|
120
|
+
ldata.push(ScraperWiki._unicode_truncate(value.to_s, 50))
|
121
|
+
end
|
122
|
+
else
|
123
|
+
ldata = data
|
124
|
+
end
|
125
|
+
ScraperWiki.dumpMessage({'message_type'=>'sqlitecall', 'command'=>"execute", 'val1'=>sqlquery, 'val2'=>ldata})
|
126
|
+
end
|
127
|
+
return res
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
# this ought to be a local function
|
133
|
+
def ScraperWiki._convdata(unique_keys, scraper_data)
|
134
|
+
if unique_keys
|
135
|
+
for key in unique_keys
|
136
|
+
if !key.kind_of?(String) and !key.kind_of?(Symbol)
|
137
|
+
return { "error" => 'unique_keys must each be a string or a symbol', "bad_key" => key }
|
138
|
+
end
|
139
|
+
if !scraper_data.include?(key) and !scraper_data.include?(key.to_sym)
|
140
|
+
return { "error" => 'unique_keys must be a subset of data', "bad_key" => key }
|
141
|
+
end
|
142
|
+
if scraper_data[key] == nil and scraper_data[key.to_sym] == nil
|
143
|
+
return { "error" => 'unique_key value should not be nil', "bad_key" => key }
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
jdata = { }
|
149
|
+
scraper_data.each_pair do |key, value|
|
150
|
+
if not key
|
151
|
+
return { "error" => 'key must not be blank', "bad_key" => key }
|
152
|
+
end
|
153
|
+
if key.kind_of?(Symbol)
|
154
|
+
key = key.to_s
|
155
|
+
end
|
156
|
+
if key.class != String
|
157
|
+
return { "error" => 'key must be string type', "bad_key" => key }
|
158
|
+
end
|
159
|
+
|
160
|
+
if !/[a-zA-Z0-9_\- ]+$/.match(key)
|
161
|
+
return { "error"=>'key must be simple text', "bad_key"=> key }
|
162
|
+
end
|
163
|
+
|
164
|
+
if value.kind_of?(Date)
|
165
|
+
value = value.iso8601
|
166
|
+
end
|
167
|
+
if value.kind_of?(Time)
|
168
|
+
value = value.iso8601
|
169
|
+
raise "internal error, timezone came out as non-UTC while converting to SQLite format" unless value.match(/\+00:00$/)
|
170
|
+
value.gsub!(/\+00:00$/, '')
|
171
|
+
end
|
172
|
+
if ![Fixnum, Float, String, TrueClass, FalseClass, NilClass].include?(value.class)
|
173
|
+
value = value.to_s
|
174
|
+
end
|
175
|
+
jdata[key] = value
|
176
|
+
end
|
177
|
+
return jdata
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
def ScraperWiki.save_sqlite(unique_keys, data, table_name="swdata", verbose=2)
|
182
|
+
if !data
|
183
|
+
ScraperWiki.dumpMessage({'message_type' => 'data', 'content' => "EMPTY SAVE IGNORED"})
|
184
|
+
return
|
185
|
+
end
|
186
|
+
|
187
|
+
# convert :symbols to "strings"
|
188
|
+
unique_keys = unique_keys.map { |x| x.kind_of?(Symbol) ? x.to_s : x }
|
189
|
+
|
190
|
+
if data.class == Hash
|
191
|
+
data = [ data ]
|
192
|
+
end
|
193
|
+
|
194
|
+
rjdata = [ ]
|
195
|
+
for ldata in data
|
196
|
+
ljdata = _convdata(unique_keys, ldata)
|
197
|
+
if ljdata.include?("error")
|
198
|
+
raise SqliteException.new(ljdata["error"])
|
199
|
+
end
|
200
|
+
rjdata.push(ljdata)
|
201
|
+
end
|
202
|
+
|
203
|
+
ds = SW_DataStore.create()
|
204
|
+
if ds.m_webstore_port == 0
|
205
|
+
res = ds.request({'maincommand'=>'save_sqlite', 'unique_keys'=>unique_keys, 'data'=>rjdata, 'swdatatblname'=>table_name})
|
206
|
+
else
|
207
|
+
username = 'resourcedir' # gets it into the right subdirectory automatically!!!
|
208
|
+
dirscrapername = ds.m_scrapername
|
209
|
+
if ds.m_scrapername == '' or ds.m_scrapername.nil?
|
210
|
+
dirscrapername = 'DRAFT__' + ds.m_runid.gsub(/[\.\-]/, '_')
|
211
|
+
end
|
212
|
+
|
213
|
+
# (do something about jargtypes later)
|
214
|
+
qsl = [ ]
|
215
|
+
unique_keys.each do |key|
|
216
|
+
qsl.push("unique="+URI.encode(key))
|
217
|
+
end
|
218
|
+
|
219
|
+
# quick and dirty provision of column types to the webstore
|
220
|
+
if rjdata.length != 0
|
221
|
+
jargtypes = { }
|
222
|
+
rjdata[0].each_pair do |k, v|
|
223
|
+
if v != nil
|
224
|
+
#if k[-5..-1] == "_blob"
|
225
|
+
# vt = "blob" # coerced into affinity none
|
226
|
+
if v.class == Fixnum
|
227
|
+
vt = "integer"
|
228
|
+
elsif v.class == Float
|
229
|
+
vt = "real"
|
230
|
+
else
|
231
|
+
vt = "text"
|
232
|
+
end
|
233
|
+
jargtypes[k] = vt
|
234
|
+
end
|
235
|
+
end
|
236
|
+
qsl.push(("jargtypes="+JSON.generate(jargtypes)))
|
237
|
+
end
|
238
|
+
|
239
|
+
path = "%s/%s/%s?%s" % [username, dirscrapername, table_name, qsl.join("&")]
|
240
|
+
#puts JSON.generate(rjdata)
|
241
|
+
httpcall = Net::HTTP.new(ds.m_host, ds.m_webstore_port)
|
242
|
+
headers = { "Accept"=>"application/json", "X-Scrapername"=>ds.m_scrapername, "X-Runid"=>ds.m_runid, "Content-Type"=>"application/json" }
|
243
|
+
response = httpcall.post(path, JSON.generate(rjdata), headers)
|
244
|
+
#puts response.body
|
245
|
+
res = JSON.parse(response.body)
|
246
|
+
if res["state"] == "error"
|
247
|
+
res["error"] = res["message"]
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
|
252
|
+
if res["error"]
|
253
|
+
raise SqliteException.new(res["error"])
|
254
|
+
end
|
255
|
+
|
256
|
+
if verbose >= 2
|
257
|
+
pdata = { }
|
258
|
+
if rjdata.class == Hash
|
259
|
+
sdata = rjdata
|
260
|
+
else
|
261
|
+
sdata = rjdata[0]
|
262
|
+
end
|
263
|
+
sdata.each_pair do |key, value|
|
264
|
+
key = ScraperWiki._unicode_truncate(key.to_s, 50)
|
265
|
+
if value == nil
|
266
|
+
value = ''
|
267
|
+
else
|
268
|
+
value = ScraperWiki._unicode_truncate(value.to_s, 50)
|
269
|
+
end
|
270
|
+
pdata[key] = String(value)
|
271
|
+
end
|
272
|
+
if rjdata.class == Array and rjdata.size > 1
|
273
|
+
pdata["number_records"] = "Number Records: "+String(rjdata.size)
|
274
|
+
end
|
275
|
+
ScraperWiki.dumpMessage({'message_type' => 'data', 'content' => pdata})
|
276
|
+
end
|
277
|
+
return res
|
278
|
+
end
|
279
|
+
|
280
|
+
# also needs to handle the types better (could save json and datetime objects handily
|
281
|
+
def ScraperWiki.save_var(name, value, verbose=2)
|
282
|
+
vtype = String(value.class)
|
283
|
+
svalue = value.to_s
|
284
|
+
if vtype != "Fixnum" and vtype != "String" and vtype != "Float" and vtype != "NilClass"
|
285
|
+
puts "*** object of type "+vtype+" converted to string\n"
|
286
|
+
end
|
287
|
+
data = { "name" => name, "value_blob" => svalue, "type" => vtype }
|
288
|
+
ScraperWiki.save_sqlite(unique_keys=["name"], data=data, table_name="swvariables", verbose=verbose)
|
289
|
+
end
|
290
|
+
|
291
|
+
def ScraperWiki.get_var(name, default=nil, verbose=2)
|
292
|
+
begin
|
293
|
+
result = ScraperWiki.sqliteexecute("select value_blob, type from swvariables where name=?", [name], verbose)
|
294
|
+
rescue NoSuchTableSqliteException => e
|
295
|
+
return default
|
296
|
+
end
|
297
|
+
|
298
|
+
if !result.has_key?("data")
|
299
|
+
return default
|
300
|
+
end
|
301
|
+
|
302
|
+
if result["data"].length == 0
|
303
|
+
return default
|
304
|
+
end
|
305
|
+
# consider casting to type
|
306
|
+
svalue = result["data"][0][0]
|
307
|
+
vtype = result["data"][0][1]
|
308
|
+
if vtype == "Fixnum"
|
309
|
+
return svalue.to_i
|
310
|
+
end
|
311
|
+
if vtype == "Float"
|
312
|
+
return svalue.to_f
|
313
|
+
end
|
314
|
+
if vtype == "NilClass"
|
315
|
+
return nil
|
316
|
+
end
|
317
|
+
return svalue
|
318
|
+
end
|
319
|
+
|
320
|
+
# These are DEPRECATED and just here for compatibility
|
321
|
+
def ScraperWiki.get_metadata(metadata_name, default = nil)
|
322
|
+
if !$metadatamessagedone == nil
|
323
|
+
puts "*** instead of get_metadata('"+metadata_name+"') please use\n get_var('"+metadata_name+"')"
|
324
|
+
metadatamessagedone = true
|
325
|
+
end
|
326
|
+
result = ScraperWiki.get_var(metadata_name, default)
|
327
|
+
return result
|
328
|
+
end
|
329
|
+
|
330
|
+
# These are DEPRECATED and just here for compatibility
|
331
|
+
def ScraperWiki.save_metadata(metadata_name, value)
|
332
|
+
if !$metadatamessagedone
|
333
|
+
puts "*** instead of save_metadata('"+metadata_name+"') please use\n save_var('"+metadata_name+"')"
|
334
|
+
$metadatamessagedone = true
|
335
|
+
end
|
336
|
+
return ScraperWiki.save_var(metadata_name, value)
|
337
|
+
end
|
338
|
+
|
339
|
+
|
340
|
+
def ScraperWiki.show_tables(dbname=nil)
|
341
|
+
name = "sqlite_master"
|
342
|
+
if dbname != nil
|
343
|
+
name = "`"+dbname+"`.sqlite_master"
|
344
|
+
end
|
345
|
+
result = ScraperWiki.sqliteexecute("select tbl_name, sql from "+name+" where type='table'")
|
346
|
+
#return result["data"]
|
347
|
+
return (Hash[*result["data"].flatten]) # pre-1.8.7
|
348
|
+
end
|
349
|
+
|
350
|
+
|
351
|
+
def ScraperWiki.table_info(name)
|
352
|
+
sname = name.split(".")
|
353
|
+
if sname.length == 2
|
354
|
+
result = ScraperWiki.sqliteexecute("PRAGMA %s.table_info(`%s`)" % sname)
|
355
|
+
else
|
356
|
+
result = ScraperWiki.sqliteexecute("PRAGMA table_info(`%s`)" % name)
|
357
|
+
end
|
358
|
+
res = [ ]
|
359
|
+
for d in result["data"]
|
360
|
+
res.push(Hash[*result["keys"].zip(d).flatten]) # pre-1.8.7
|
361
|
+
end
|
362
|
+
return res
|
363
|
+
end
|
364
|
+
|
365
|
+
|
366
|
+
def ScraperWiki.getDataByDate(name, start_date, end_date, limit=-1, offset=0)
|
367
|
+
raise SqliteException.new("getDataByDate has been deprecated")
|
368
|
+
end
|
369
|
+
|
370
|
+
def ScraperWiki.getDataByLocation(name, lat, lng, limit=-1, offset=0)
|
371
|
+
raise SqliteException.new("getDataByLocation has been deprecated")
|
372
|
+
end
|
373
|
+
|
374
|
+
def ScraperWiki.search(name, filterdict, limit=-1, offset=0)
|
375
|
+
raise SqliteException.new("SW_APIWrapper.search has been deprecated")
|
376
|
+
end
|
377
|
+
|
378
|
+
def ScraperWiki.raisesqliteerror(rerror)
|
379
|
+
if /sqlite3.Error: no such table:/.match(rerror) # old dataproxy
|
380
|
+
raise NoSuchTableSqliteException.new(rerror)
|
381
|
+
end
|
382
|
+
if /DB Error: \(OperationalError\) no such table:/.match(rerror)
|
383
|
+
raise NoSuchTableSqliteException.new(rerror)
|
384
|
+
end
|
385
|
+
raise SqliteException.new(rerror)
|
386
|
+
end
|
387
|
+
|
388
|
+
def ScraperWiki.attach(name, asname=nil, verbose=1)
|
389
|
+
$attachlist.push({"name"=>name, "asname"=>asname})
|
390
|
+
|
391
|
+
ds = SW_DataStore.create()
|
392
|
+
|
393
|
+
if ds.m_webstore_port == 0
|
394
|
+
res = ds.request({'maincommand'=>'sqlitecommand', 'command'=>"attach", 'name'=>name, 'asname'=>asname})
|
395
|
+
if res["error"]
|
396
|
+
ScraperWiki.raisesqliteerror(res)
|
397
|
+
end
|
398
|
+
else
|
399
|
+
res = {'status'=>'ok'}
|
400
|
+
end
|
401
|
+
|
402
|
+
if verbose
|
403
|
+
ScraperWiki.dumpMessage({'message_type'=>'sqlitecall', 'command'=>"attach", 'val1'=>name, 'val2'=>asname})
|
404
|
+
end
|
405
|
+
|
406
|
+
return res
|
407
|
+
end
|
408
|
+
|
409
|
+
|
410
|
+
def ScraperWiki.commit(verbose=1)
|
411
|
+
ds = SW_DataStore.create()
|
412
|
+
if ds.m_webstore_port == 0
|
413
|
+
res = ds.request({'maincommand'=>'sqlitecommand', 'command'=>"commit"})
|
414
|
+
else
|
415
|
+
puts "*** commit() no longer a necessary function call"
|
416
|
+
res = {'status'=>'ok'}
|
417
|
+
end
|
418
|
+
end
|
419
|
+
|
420
|
+
def ScraperWiki.select(sqlquery, data=nil, verbose=1)
|
421
|
+
if data != nil && sqlquery.scan(/\?/).length != 0 && data.class != Array
|
422
|
+
data = [data]
|
423
|
+
end
|
424
|
+
result = ScraperWiki.sqliteexecute("select "+sqlquery, data, verbose)
|
425
|
+
res = [ ]
|
426
|
+
for d in result["data"]
|
427
|
+
#res.push(Hash[result["keys"].zip(d)]) # post-1.8.7
|
428
|
+
res.push(Hash[*result["keys"].zip(d).flatten]) # pre-1.8.7
|
429
|
+
end
|
430
|
+
return res
|
431
|
+
end
|
432
|
+
|
433
|
+
# old functions put back in for regression
|
434
|
+
def ScraperWiki.getData(name, limit=-1, offset=0)
|
435
|
+
if !$apiwrapperattacheddata.include?(name)
|
436
|
+
puts "*** instead of getData('"+name+"') please use\n ScraperWiki.attach('"+name+"') \n print ScraperWiki.select('* from `"+name+"`.swdata')"
|
437
|
+
ScraperWiki.attach(name)
|
438
|
+
$apiwrapperattacheddata.push(name)
|
439
|
+
end
|
440
|
+
|
441
|
+
apilimit = 500
|
442
|
+
g = Enumerator.new do |g|
|
443
|
+
count = 0
|
444
|
+
while true
|
445
|
+
if limit == -1
|
446
|
+
step = apilimit
|
447
|
+
else
|
448
|
+
step = apilimit < (limit - count) ? apilimit : limit - count
|
449
|
+
end
|
450
|
+
query = "* from `#{name}`.swdata limit #{step} offset #{offset+count}"
|
451
|
+
|
452
|
+
records = ScraperWiki.select(query)
|
453
|
+
for r in records
|
454
|
+
g.yield r
|
455
|
+
end
|
456
|
+
|
457
|
+
count += records.length
|
458
|
+
if records.length < step
|
459
|
+
break
|
460
|
+
end
|
461
|
+
if limit != -1 and count >= limit
|
462
|
+
break
|
463
|
+
end
|
464
|
+
end
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
def ScraperWiki.getKeys(name)
|
469
|
+
if !$apiwrapperattacheddata.include?(name)
|
470
|
+
puts "*** instead of getKeys('"+name+"') please use\n ScraperWiki.attach('"+name+"') \n print ScraperWiki.sqliteexecute('select * from `"+name+"`.swdata limit 0')['keys']"
|
471
|
+
ScraperWiki.attach(name)
|
472
|
+
$apiwrapperattacheddata.push(name)
|
473
|
+
end
|
474
|
+
result = ScraperWiki.sqliteexecute("select * from `"+name+"`.swdata limit 0")
|
475
|
+
if result.include?("error")
|
476
|
+
raise SqliteException.new(result["error"])
|
477
|
+
end
|
478
|
+
return result["keys"]
|
479
|
+
end
|
480
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'singleton'
|
3
|
+
require 'thread'
|
4
|
+
require 'cgi'
|
5
|
+
|
6
|
+
# the python version of this makes use of a global static copy of the class
|
7
|
+
# so the connection is made only once to the dataproxy
|
8
|
+
# I think the Singleton module implements this magically
|
9
|
+
|
10
|
+
class SW_DataStore
|
11
|
+
|
12
|
+
@@lock = Mutex.new
|
13
|
+
|
14
|
+
include Singleton
|
15
|
+
|
16
|
+
attr_accessor :m_port, :m_host, :m_scrapername, :m_runid, :m_attachables, :m_webstore_port
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
@m_socket = nil
|
20
|
+
@m_host = nil
|
21
|
+
@m_port = nil
|
22
|
+
@m_scrapername = ''
|
23
|
+
@m_runid = ''
|
24
|
+
@m_attachables = []
|
25
|
+
@webstore_port = 0
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
def ensure_connected
|
30
|
+
# Connect to the data proxy. The data proxy will need to make an Ident call
|
31
|
+
# back to get the scraperID. Since the data proxy may be on another machine
|
32
|
+
# and the peer address it sees will have been subject to NAT or masquerading,
|
33
|
+
# send the UML name and the socket port number in the request.
|
34
|
+
|
35
|
+
if @m_socket == nil
|
36
|
+
@m_socket = TCPSocket.open(@m_host, @m_port)
|
37
|
+
proto, port, name, ip = @m_socket.addr()
|
38
|
+
if @m_scrapername == '' or @m_scrapername.nil?
|
39
|
+
sname = ''
|
40
|
+
else
|
41
|
+
sname = CGI::escape(@m_scrapername)
|
42
|
+
end
|
43
|
+
if @m_runid == '' or @m_runid.nil?
|
44
|
+
rid = ''
|
45
|
+
else
|
46
|
+
rid = CGI::escape(@m_runid)
|
47
|
+
end
|
48
|
+
|
49
|
+
getmsg = "GET /?uml=%s&port=%s&vscrapername=%s&vrunid=%s HTTP/1.1\n\n" % ['lxc', port, sname, rid]
|
50
|
+
@m_socket.send(getmsg, 0)
|
51
|
+
@m_socket.flush()
|
52
|
+
|
53
|
+
buffer = @m_socket.recv(1024)
|
54
|
+
result = JSON.parse(buffer)
|
55
|
+
if result["status"] != "good"
|
56
|
+
raise result["status"]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def request (req)
|
62
|
+
text = ''
|
63
|
+
@@lock.synchronize {
|
64
|
+
ensure_connected
|
65
|
+
reqmsg = JSON.generate(req) + "\n"
|
66
|
+
|
67
|
+
bytes_sent = 0
|
68
|
+
while bytes_sent < reqmsg.length
|
69
|
+
bytes_sent += @m_socket.send(reqmsg.slice(bytes_sent, reqmsg.length), 0)
|
70
|
+
end
|
71
|
+
@m_socket.flush()
|
72
|
+
|
73
|
+
while true
|
74
|
+
buffer = @m_socket.recv(1024)
|
75
|
+
if buffer.length == 0
|
76
|
+
break
|
77
|
+
end
|
78
|
+
text += buffer
|
79
|
+
if text[-1] == "\n"[0]
|
80
|
+
break
|
81
|
+
end
|
82
|
+
end
|
83
|
+
}
|
84
|
+
return JSON.parse(text)
|
85
|
+
end
|
86
|
+
|
87
|
+
# function used to both initialize the settings and get an instance!
|
88
|
+
# this is ridiculous and unnecessary with new webstore.
|
89
|
+
# we are creating object without the fields merely to access the static variables!
|
90
|
+
def SW_DataStore.create(host=nil, port = nil, scrapername = '', runid = nil, attachables = nil, webstore_port = nil)
|
91
|
+
instance = SW_DataStore.instance
|
92
|
+
# so, it might be intended that the host and port are
|
93
|
+
# set once, never to be changed, but this is ruby so
|
94
|
+
# there's no way to guarantee that.
|
95
|
+
if host && port && instance.m_port.nil? && instance.m_host.nil?
|
96
|
+
instance.m_host = host
|
97
|
+
instance.m_port = port
|
98
|
+
instance.m_scrapername = scrapername
|
99
|
+
instance.m_runid = runid
|
100
|
+
instance.m_attachables = attachables
|
101
|
+
instance.m_webstore_port = webstore_port
|
102
|
+
elsif host && port
|
103
|
+
raise "Can't change host and port once connection made"
|
104
|
+
elsif !(instance.m_port) || !(instance.m_host)
|
105
|
+
raise "Can't return a datastore without port/host information"
|
106
|
+
end
|
107
|
+
instance
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
def _get_stackentry(code_filename, code, filename, linenumber, funcname)
|
2
|
+
nlinenumber = linenumber.to_i
|
3
|
+
stackentry = {"file" => filename, "linenumber" => nlinenumber, "duplicates" => 1}
|
4
|
+
|
5
|
+
if filename == "(eval)" or filename == code_filename
|
6
|
+
codelines = code.split("\n")
|
7
|
+
if (nlinenumber >= 1) && (nlinenumber <= codelines.size)
|
8
|
+
stackentry["linetext"] = codelines[nlinenumber-1]
|
9
|
+
elsif (nlinenumber == codelines.size + 1)
|
10
|
+
stackentry["linetext"] = "<end of file>"
|
11
|
+
else
|
12
|
+
stackentry["linetext"] = "getExceptionTraceback: ScraperWiki internal error, line %d out of range in file %s" % [nlinenumber, code_filename]
|
13
|
+
end
|
14
|
+
stackentry["file"] = "<string>"
|
15
|
+
else
|
16
|
+
# XXX bit of a hack to show the line number in third party libraries
|
17
|
+
stackentry["file"] += ":" + linenumber
|
18
|
+
end
|
19
|
+
if funcname
|
20
|
+
stackentry["furtherlinetext"] = funcname
|
21
|
+
end
|
22
|
+
return stackentry
|
23
|
+
end
|
24
|
+
|
25
|
+
def getExceptionTraceback(e, code, code_filename)
|
26
|
+
lbacktrace = e.backtrace.reverse
|
27
|
+
#File.open("/tmp/fairuby", 'a') {|f| f.write(JSON.generate(lbacktrace)) }
|
28
|
+
|
29
|
+
exceptiondescription = e.to_s
|
30
|
+
|
31
|
+
stackdump = []
|
32
|
+
for l in lbacktrace
|
33
|
+
(filename, linenumber, funcname) = l.split(":")
|
34
|
+
|
35
|
+
next if filename.match(/\/exec.rb$/) # skip showing stack of wrapper
|
36
|
+
|
37
|
+
stackentry = _get_stackentry(code_filename, code, filename, linenumber, funcname)
|
38
|
+
stackdump.push(stackentry)
|
39
|
+
end
|
40
|
+
|
41
|
+
if e.kind_of?(SyntaxError)
|
42
|
+
(filename, linenumber, message) = exceptiondescription.split(/[:\n]/, 3)
|
43
|
+
exceptiondescription = message
|
44
|
+
|
45
|
+
stackentry = _get_stackentry(code_filename, code, filename, linenumber, nil)
|
46
|
+
stackdump.push(stackentry)
|
47
|
+
end
|
48
|
+
|
49
|
+
return { 'message_type' => 'exception', 'exceptiondescription' => exceptiondescription, "stackdump" => stackdump }
|
50
|
+
end
|
51
|
+
|
data/lib/version.rb
ADDED
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scraperwiki
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 1.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Francis Irving
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-10-09 00:00:00 +02:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: json
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: httpclient
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
49
|
+
description: Ruby code used for accessing
|
50
|
+
email:
|
51
|
+
- francis@scraperwiki.com
|
52
|
+
executables: []
|
53
|
+
|
54
|
+
extensions: []
|
55
|
+
|
56
|
+
extra_rdoc_files: []
|
57
|
+
|
58
|
+
files:
|
59
|
+
- lib/version.rb
|
60
|
+
- lib/scraperwiki.rb
|
61
|
+
- lib/scraperwiki/datastore.rb
|
62
|
+
- lib/scraperwiki/stacktrace.rb
|
63
|
+
has_rdoc: true
|
64
|
+
homepage: http://scraperwiki.com
|
65
|
+
licenses: []
|
66
|
+
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 3
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
hash: 3
|
87
|
+
segments:
|
88
|
+
- 0
|
89
|
+
version: "0"
|
90
|
+
requirements: []
|
91
|
+
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 1.5.2
|
94
|
+
signing_key:
|
95
|
+
specification_version: 3
|
96
|
+
summary: ScraperWiki client library for Ruby
|
97
|
+
test_files: []
|
98
|
+
|