scraperwiki 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,480 +1,124 @@
1
- require 'json'
2
- require 'uri'
3
- require 'net/http'
4
- require 'scraperwiki/datastore'
5
- require 'httpclient'
6
-
7
- class SqliteException < RuntimeError
8
- end
9
-
10
- class NoSuchTableSqliteException < SqliteException
11
- end
12
-
13
- $apiwrapperattacheddata = [ ]
1
+ require 'sqlite3'
2
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
3
+ require 'scraperwiki/sqlite_save_info.rb'
14
4
 
15
5
  module ScraperWiki
16
6
 
17
- $metadatamessagedone = false
18
- $attachlist = [ ]
19
-
20
- def ScraperWiki.dumpMessage(hash)
21
- msg = JSON.generate(hash)
22
- $logfd.write( "JSONRECORD(" + msg.length.to_s() + "):" + msg + "\n")
23
- $logfd.flush()
24
- end
25
-
26
- def ScraperWiki.httpresponseheader(headerkey, headervalue)
27
- ScraperWiki.dumpMessage({'message_type' => 'httpresponseheader', 'headerkey' => headerkey, 'headervalue' => headervalue})
28
- end
29
-
30
- def ScraperWiki.scrape(url, params = nil)
31
- client = HTTPClient.new
7
+ # The scrape method fetches the content from a webserver.
8
+ #
9
+ # === Parameters
10
+ #
11
+ # * _url_ = The URL to fetch
12
+ # * _params_ = The parameters to send with a POST request
13
+ # * _agent = A manually supplied useragent string
14
+ #
15
+ # === Example
16
+ # ScraperWiki::scrape('http://scraperwiki.com')
17
+ #
18
+ def ScraperWiki.scrape(url, params = nil, agent = nil)
19
+ if agent
20
+ client = HTTPClient.new(:agent_name => agent)
21
+ else
22
+ client = HTTPClient.new
23
+ end
32
24
  client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
25
+ if HTTPClient.respond_to?("client.transparent_gzip_decompression=")
26
+ client.transparent_gzip_decompression = true
27
+ end
33
28
 
34
- if params.nil?
35
- return client.get_content(url)
29
+ if params.nil?
30
+ html = client.get_content(url)
36
31
  else
37
- return client.post_content(url, params)
32
+ html = client.post_content(url, params)
38
33
  end
39
- end
40
34
 
41
- def ScraperWiki.gb_postcode_to_latlng(postcode)
42
- uri = URI.parse("http://views.scraperwiki.com/run/uk_postcode_lookup/?postcode="+URI.escape(postcode))
43
- sres = Net::HTTP.get(uri)
44
- jres = JSON.parse(sres)
45
- if jres["lat"] and jres["lng"]
46
- return [jres["lat"], jres["lng"]]
35
+ unless HTTPClient.respond_to?("client.transparent_gzip_decompression=")
36
+ begin
37
+ gz = Zlib::GzipReader.new(StringIO.new(html))
38
+ return gz.read
39
+ rescue
40
+ return html
47
41
  end
48
- return nil
42
+ end
49
43
  end
50
44
 
45
+ # Saves the provided data into a local database for this scraper. Data is upserted
46
+ # into this table (inserted if it does not exist, updated if the unique keys say it
47
+ # does).
48
+ #
49
+ # === Parameters
50
+ #
51
+ # * _unique_keys_ = A list of column names, that used together should be unique
52
+ # * _data_ = A hash of the data where the Key is the column name, the Value the row
53
+ # value. If sending lots of data this can be a list of hashes.
54
+ # * _table_name_ = The name that the newly created table should use.
55
+ #
56
+ # === Example
57
+ # ScraperWiki::save(['id'], {'id'=>1})
58
+ #
59
+ def ScraperWiki.save_sqlite(unique_keys, data, table_name="swdata")
60
+ raise 'unique_keys must be nil or an array' if unique_keys != nil && !unique_keys.kind_of?(Array)
61
+ raise 'data must have a non-nil value' if data == nil
51
62
 
63
+ # convert :symbols to "strings"
64
+ unique_keys = unique_keys.map { |x| x.kind_of?(Symbol) ? x.to_s : x }
52
65
 
53
- def ScraperWiki._unicode_truncate(string, size)
54
- # Stops 2 byte unicode characters from being chopped in half which kills JSON serializer
55
- string.scan(/./u)[0,size].join
56
- end
57
-
58
- def ScraperWiki.save(unique_keys, data, date=nil, latlng=nil, table_name="swdata")
59
- if unique_keys != nil && !unique_keys.kind_of?(Array)
60
- raise 'unique_keys must be nil or an array'
61
- end
62
- if data == nil
63
- raise 'data must have a non-nil value'
64
- end
65
-
66
- ds = SW_DataStore.create()
67
- ldata = data.dup
68
- if date != nil
69
- ldata["date"] = date
70
- end
71
- if latlng != nil
72
- ldata["latlng_lat"] = latlng[0]
73
- ldata["latlng_lng"] = latlng[1]
66
+ if data.class == Hash
67
+ data = [ data ]
68
+ elsif data.length == 0
69
+ return
74
70
  end
75
- return ScraperWiki.save_sqlite(unique_keys, ldata, table_name="swdata", verbose=2)
76
- end
77
-
78
71
 
79
- def ScraperWiki.sqliteexecute(sqlquery, data=nil, verbose=2)
80
- ds = SW_DataStore.create()
81
- if ds.m_webstore_port == 0
82
- res = ds.request({'maincommand'=>'sqliteexecute', 'sqlquery'=>sqlquery, 'data'=>data, 'attachlist'=>$attachlist})
83
- else
84
- username = 'resourcedir' # gets it into the right subdirectory automatically!!!
85
- dirscrapername = ds.m_scrapername
86
- if ds.m_scrapername == '' or ds.m_scrapername.nil?
87
- dirscrapername = 'DRAFT__' + ds.m_runid.gsub(/[\.\-]/, '_')
88
- end
89
- path = "%s/%s" % [username, dirscrapername]
90
-
91
- record = {"query"=>sqlquery, "params"=>data, "attach"=>[]}
92
- $attachlist.each do |value|
93
- record["attach"].push({"user"=>username, "database"=>value["name"], "alias"=>value["asattach"], "securityhash"=>"somthing"})
94
- end
95
-
96
- httpcall = Net::HTTP.new(ds.m_host, ds.m_webstore_port)
97
- headers = { "Accept"=>"application/json+tuples", "X-Scrapername"=>ds.m_scrapername, "X-Runid"=>ds.m_runid, "Content-Type"=>"application/json" }
98
- response = httpcall.put(path, JSON.generate(record), headers)
99
- res = JSON.parse(response.body)
100
- if res["state"] == "error"
101
- ScraperWiki.raisesqliteerror(res["message"])
102
- end
103
- if (res.class == Hash) and (res["keys"].class == Array) and (res["data"].class == Array)
104
- if res["keys"].include?("state") and (res["data"].length == 1)
105
- ddata = Hash[*res["keys"].zip(res["data"][0]).flatten]
106
- if ddata["state"] == "error"
107
- ScraperWiki.raisesqliteerror(ddata["message"])
108
- end
109
- end
110
- end
111
- end
72
+ rjdata = [ ]
73
+ for ldata in data
74
+ ljdata = _convdata(unique_keys, ldata)
75
+ rjdata.push(ljdata)
112
76
 
113
- if verbose
114
- if data.kind_of?(Array)
115
- data.each do |value|
116
- ldata = [ ]
117
- if value == nil
118
- value = ''
119
- end
120
- ldata.push(ScraperWiki._unicode_truncate(value.to_s, 50))
121
- end
122
- else
123
- ldata = data
124
- end
125
- ScraperWiki.dumpMessage({'message_type'=>'sqlitecall', 'command'=>"execute", 'val1'=>sqlquery, 'val2'=>ldata})
126
77
  end
127
- return res
128
- end
129
-
130
78
 
79
+ SQLiteMagic._do_save_sqlite(unique_keys, rjdata, table_name)
80
+ end
131
81
 
132
- # this ought to be a local function
82
+ # Internal function to check a row of data, convert to right format
133
83
  def ScraperWiki._convdata(unique_keys, scraper_data)
134
84
  if unique_keys
135
85
  for key in unique_keys
136
86
  if !key.kind_of?(String) and !key.kind_of?(Symbol)
137
- return { "error" => 'unique_keys must each be a string or a symbol', "bad_key" => key }
87
+ return 'unique_keys must each be a string or a symbol, this one is not: ' + key
138
88
  end
139
89
  if !scraper_data.include?(key) and !scraper_data.include?(key.to_sym)
140
- return { "error" => 'unique_keys must be a subset of data', "bad_key" => key }
90
+ return 'unique_keys must be a subset of data, this one is not: ' + key
141
91
  end
142
92
  if scraper_data[key] == nil and scraper_data[key.to_sym] == nil
143
- return { "error" => 'unique_key value should not be nil', "bad_key" => key }
93
+ return 'unique_key value should not be nil, this one is nil: ' + key
144
94
  end
145
95
  end
146
96
  end
147
97
 
148
98
  jdata = { }
149
99
  scraper_data.each_pair do |key, value|
150
- if not key
151
- return { "error" => 'key must not be blank', "bad_key" => key }
152
- end
153
- if key.kind_of?(Symbol)
154
- key = key.to_s
155
- end
156
- if key.class != String
157
- return { "error" => 'key must be string type', "bad_key" => key }
158
- end
100
+ raise 'key must not have blank name' if not key
159
101
 
160
- if !/[a-zA-Z0-9_\- ]+$/.match(key)
161
- return { "error"=>'key must be simple text', "bad_key"=> key }
162
- end
102
+ key = key.to_s if key.kind_of?(Symbol)
103
+ raise 'key must be string or symbol type: ' + key if key.class != String
104
+ raise 'key must be simple text: ' + key if !/[a-zA-Z0-9_\- ]+$/.match(key)
163
105
 
164
- if value.kind_of?(Date)
106
+ # convert formats
107
+ if value.kind_of?(Date)
165
108
  value = value.iso8601
166
109
  end
167
110
  if value.kind_of?(Time)
168
111
  value = value.iso8601
169
- raise "internal error, timezone came out as non-UTC while converting to SQLite format" unless value.match(/\+00:00$/)
170
- value.gsub!(/\+00:00$/, '')
112
+ raise "internal error, timezone came out as non-UTC while converting to SQLite format" unless value.match(/([+-]00:00|Z)$/)
113
+ value.gsub!(/([+-]00:00|Z)$/, '')
171
114
  end
172
115
  if ![Fixnum, Float, String, TrueClass, FalseClass, NilClass].include?(value.class)
173
116
  value = value.to_s
174
117
  end
118
+
175
119
  jdata[key] = value
176
120
  end
177
121
  return jdata
178
122
  end
179
123
 
180
-
181
- def ScraperWiki.save_sqlite(unique_keys, data, table_name="swdata", verbose=2)
182
- if !data
183
- ScraperWiki.dumpMessage({'message_type' => 'data', 'content' => "EMPTY SAVE IGNORED"})
184
- return
185
- end
186
-
187
- # convert :symbols to "strings"
188
- unique_keys = unique_keys.map { |x| x.kind_of?(Symbol) ? x.to_s : x }
189
-
190
- if data.class == Hash
191
- data = [ data ]
192
- end
193
-
194
- rjdata = [ ]
195
- for ldata in data
196
- ljdata = _convdata(unique_keys, ldata)
197
- if ljdata.include?("error")
198
- raise SqliteException.new(ljdata["error"])
199
- end
200
- rjdata.push(ljdata)
201
- end
202
-
203
- ds = SW_DataStore.create()
204
- if ds.m_webstore_port == 0
205
- res = ds.request({'maincommand'=>'save_sqlite', 'unique_keys'=>unique_keys, 'data'=>rjdata, 'swdatatblname'=>table_name})
206
- else
207
- username = 'resourcedir' # gets it into the right subdirectory automatically!!!
208
- dirscrapername = ds.m_scrapername
209
- if ds.m_scrapername == '' or ds.m_scrapername.nil?
210
- dirscrapername = 'DRAFT__' + ds.m_runid.gsub(/[\.\-]/, '_')
211
- end
212
-
213
- # (do something about jargtypes later)
214
- qsl = [ ]
215
- unique_keys.each do |key|
216
- qsl.push("unique="+URI.encode(key))
217
- end
218
-
219
- # quick and dirty provision of column types to the webstore
220
- if rjdata.length != 0
221
- jargtypes = { }
222
- rjdata[0].each_pair do |k, v|
223
- if v != nil
224
- #if k[-5..-1] == "_blob"
225
- # vt = "blob" # coerced into affinity none
226
- if v.class == Fixnum
227
- vt = "integer"
228
- elsif v.class == Float
229
- vt = "real"
230
- else
231
- vt = "text"
232
- end
233
- jargtypes[k] = vt
234
- end
235
- end
236
- qsl.push(("jargtypes="+JSON.generate(jargtypes)))
237
- end
238
-
239
- path = "%s/%s/%s?%s" % [username, dirscrapername, table_name, qsl.join("&")]
240
- #puts JSON.generate(rjdata)
241
- httpcall = Net::HTTP.new(ds.m_host, ds.m_webstore_port)
242
- headers = { "Accept"=>"application/json", "X-Scrapername"=>ds.m_scrapername, "X-Runid"=>ds.m_runid, "Content-Type"=>"application/json" }
243
- response = httpcall.post(path, JSON.generate(rjdata), headers)
244
- #puts response.body
245
- res = JSON.parse(response.body)
246
- if res["state"] == "error"
247
- res["error"] = res["message"]
248
- end
249
- end
250
-
251
-
252
- if res["error"]
253
- raise SqliteException.new(res["error"])
254
- end
255
-
256
- if verbose >= 2
257
- pdata = { }
258
- if rjdata.class == Hash
259
- sdata = rjdata
260
- else
261
- sdata = rjdata[0]
262
- end
263
- sdata.each_pair do |key, value|
264
- key = ScraperWiki._unicode_truncate(key.to_s, 50)
265
- if value == nil
266
- value = ''
267
- else
268
- value = ScraperWiki._unicode_truncate(value.to_s, 50)
269
- end
270
- pdata[key] = String(value)
271
- end
272
- if rjdata.class == Array and rjdata.size > 1
273
- pdata["number_records"] = "Number Records: "+String(rjdata.size)
274
- end
275
- ScraperWiki.dumpMessage({'message_type' => 'data', 'content' => pdata})
276
- end
277
- return res
278
- end
279
-
280
- # also needs to handle the types better (could save json and datetime objects handily
281
- def ScraperWiki.save_var(name, value, verbose=2)
282
- vtype = String(value.class)
283
- svalue = value.to_s
284
- if vtype != "Fixnum" and vtype != "String" and vtype != "Float" and vtype != "NilClass"
285
- puts "*** object of type "+vtype+" converted to string\n"
286
- end
287
- data = { "name" => name, "value_blob" => svalue, "type" => vtype }
288
- ScraperWiki.save_sqlite(unique_keys=["name"], data=data, table_name="swvariables", verbose=verbose)
289
- end
290
-
291
- def ScraperWiki.get_var(name, default=nil, verbose=2)
292
- begin
293
- result = ScraperWiki.sqliteexecute("select value_blob, type from swvariables where name=?", [name], verbose)
294
- rescue NoSuchTableSqliteException => e
295
- return default
296
- end
297
-
298
- if !result.has_key?("data")
299
- return default
300
- end
301
-
302
- if result["data"].length == 0
303
- return default
304
- end
305
- # consider casting to type
306
- svalue = result["data"][0][0]
307
- vtype = result["data"][0][1]
308
- if vtype == "Fixnum"
309
- return svalue.to_i
310
- end
311
- if vtype == "Float"
312
- return svalue.to_f
313
- end
314
- if vtype == "NilClass"
315
- return nil
316
- end
317
- return svalue
318
- end
319
-
320
- # These are DEPRECATED and just here for compatibility
321
- def ScraperWiki.get_metadata(metadata_name, default = nil)
322
- if !$metadatamessagedone == nil
323
- puts "*** instead of get_metadata('"+metadata_name+"') please use\n get_var('"+metadata_name+"')"
324
- metadatamessagedone = true
325
- end
326
- result = ScraperWiki.get_var(metadata_name, default)
327
- return result
328
- end
329
-
330
- # These are DEPRECATED and just here for compatibility
331
- def ScraperWiki.save_metadata(metadata_name, value)
332
- if !$metadatamessagedone
333
- puts "*** instead of save_metadata('"+metadata_name+"') please use\n save_var('"+metadata_name+"')"
334
- $metadatamessagedone = true
335
- end
336
- return ScraperWiki.save_var(metadata_name, value)
337
- end
338
-
339
-
340
- def ScraperWiki.show_tables(dbname=nil)
341
- name = "sqlite_master"
342
- if dbname != nil
343
- name = "`"+dbname+"`.sqlite_master"
344
- end
345
- result = ScraperWiki.sqliteexecute("select tbl_name, sql from "+name+" where type='table'")
346
- #return result["data"]
347
- return (Hash[*result["data"].flatten]) # pre-1.8.7
348
- end
349
-
350
-
351
- def ScraperWiki.table_info(name)
352
- sname = name.split(".")
353
- if sname.length == 2
354
- result = ScraperWiki.sqliteexecute("PRAGMA %s.table_info(`%s`)" % sname)
355
- else
356
- result = ScraperWiki.sqliteexecute("PRAGMA table_info(`%s`)" % name)
357
- end
358
- res = [ ]
359
- for d in result["data"]
360
- res.push(Hash[*result["keys"].zip(d).flatten]) # pre-1.8.7
361
- end
362
- return res
363
- end
364
-
365
-
366
- def ScraperWiki.getDataByDate(name, start_date, end_date, limit=-1, offset=0)
367
- raise SqliteException.new("getDataByDate has been deprecated")
368
- end
369
-
370
- def ScraperWiki.getDataByLocation(name, lat, lng, limit=-1, offset=0)
371
- raise SqliteException.new("getDataByLocation has been deprecated")
372
- end
373
-
374
- def ScraperWiki.search(name, filterdict, limit=-1, offset=0)
375
- raise SqliteException.new("SW_APIWrapper.search has been deprecated")
376
- end
377
-
378
- def ScraperWiki.raisesqliteerror(rerror)
379
- if /sqlite3.Error: no such table:/.match(rerror) # old dataproxy
380
- raise NoSuchTableSqliteException.new(rerror)
381
- end
382
- if /DB Error: \(OperationalError\) no such table:/.match(rerror)
383
- raise NoSuchTableSqliteException.new(rerror)
384
- end
385
- raise SqliteException.new(rerror)
386
- end
387
-
388
- def ScraperWiki.attach(name, asname=nil, verbose=1)
389
- $attachlist.push({"name"=>name, "asname"=>asname})
390
-
391
- ds = SW_DataStore.create()
392
-
393
- if ds.m_webstore_port == 0
394
- res = ds.request({'maincommand'=>'sqlitecommand', 'command'=>"attach", 'name'=>name, 'asname'=>asname})
395
- if res["error"]
396
- ScraperWiki.raisesqliteerror(res)
397
- end
398
- else
399
- res = {'status'=>'ok'}
400
- end
401
-
402
- if verbose
403
- ScraperWiki.dumpMessage({'message_type'=>'sqlitecall', 'command'=>"attach", 'val1'=>name, 'val2'=>asname})
404
- end
405
-
406
- return res
407
- end
408
-
409
-
410
- def ScraperWiki.commit(verbose=1)
411
- ds = SW_DataStore.create()
412
- if ds.m_webstore_port == 0
413
- res = ds.request({'maincommand'=>'sqlitecommand', 'command'=>"commit"})
414
- else
415
- puts "*** commit() no longer a necessary function call"
416
- res = {'status'=>'ok'}
417
- end
418
- end
419
-
420
- def ScraperWiki.select(sqlquery, data=nil, verbose=1)
421
- if data != nil && sqlquery.scan(/\?/).length != 0 && data.class != Array
422
- data = [data]
423
- end
424
- result = ScraperWiki.sqliteexecute("select "+sqlquery, data, verbose)
425
- res = [ ]
426
- for d in result["data"]
427
- #res.push(Hash[result["keys"].zip(d)]) # post-1.8.7
428
- res.push(Hash[*result["keys"].zip(d).flatten]) # pre-1.8.7
429
- end
430
- return res
431
- end
432
-
433
- # old functions put back in for regression
434
- def ScraperWiki.getData(name, limit=-1, offset=0)
435
- if !$apiwrapperattacheddata.include?(name)
436
- puts "*** instead of getData('"+name+"') please use\n ScraperWiki.attach('"+name+"') \n print ScraperWiki.select('* from `"+name+"`.swdata')"
437
- ScraperWiki.attach(name)
438
- $apiwrapperattacheddata.push(name)
439
- end
440
-
441
- apilimit = 500
442
- g = Enumerator.new do |g|
443
- count = 0
444
- while true
445
- if limit == -1
446
- step = apilimit
447
- else
448
- step = apilimit < (limit - count) ? apilimit : limit - count
449
- end
450
- query = "* from `#{name}`.swdata limit #{step} offset #{offset+count}"
451
-
452
- records = ScraperWiki.select(query)
453
- for r in records
454
- g.yield r
455
- end
456
-
457
- count += records.length
458
- if records.length < step
459
- break
460
- end
461
- if limit != -1 and count >= limit
462
- break
463
- end
464
- end
465
- end
466
- end
467
-
468
- def ScraperWiki.getKeys(name)
469
- if !$apiwrapperattacheddata.include?(name)
470
- puts "*** instead of getKeys('"+name+"') please use\n ScraperWiki.attach('"+name+"') \n print ScraperWiki.sqliteexecute('select * from `"+name+"`.swdata limit 0')['keys']"
471
- ScraperWiki.attach(name)
472
- $apiwrapperattacheddata.push(name)
473
- end
474
- result = ScraperWiki.sqliteexecute("select * from `"+name+"`.swdata limit 0")
475
- if result.include?("error")
476
- raise SqliteException.new(result["error"])
477
- end
478
- return result["keys"]
479
- end
480
124
  end
metadata CHANGED
@@ -1,98 +1,45 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: scraperwiki
3
- version: !ruby/object:Gem::Version
4
- hash: 21
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
5
  prerelease:
6
- segments:
7
- - 1
8
- - 0
9
- - 1
10
- version: 1.0.1
11
6
  platform: ruby
12
- authors:
13
- - Francis Irving
7
+ authors:
8
+ - Francis irving
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-10-09 00:00:00 +02:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
22
- name: json
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ">="
28
- - !ruby/object:Gem::Version
29
- hash: 3
30
- segments:
31
- - 0
32
- version: "0"
33
- type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: httpclient
37
- prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- hash: 3
44
- segments:
45
- - 0
46
- version: "0"
47
- type: :runtime
48
- version_requirements: *id002
49
- description: Ruby code used for accessing
50
- email:
51
- - francis@scraperwiki.com
12
+ date: 2013-04-04 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A library for scraping web pages and saving data easily
15
+ email: francis@scraperwiki.com
52
16
  executables: []
53
-
54
17
  extensions: []
55
-
56
18
  extra_rdoc_files: []
57
-
58
- files:
59
- - lib/version.rb
19
+ files:
60
20
  - lib/scraperwiki.rb
61
- - lib/scraperwiki/datastore.rb
62
- - lib/scraperwiki/stacktrace.rb
63
- has_rdoc: true
64
- homepage: http://scraperwiki.com
21
+ homepage: http://rubygems.org/gems/scraperwiki
65
22
  licenses: []
66
-
67
23
  post_install_message:
68
24
  rdoc_options: []
69
-
70
- require_paths:
25
+ require_paths:
71
26
  - lib
72
- required_ruby_version: !ruby/object:Gem::Requirement
27
+ required_ruby_version: !ruby/object:Gem::Requirement
73
28
  none: false
74
- requirements:
75
- - - ">="
76
- - !ruby/object:Gem::Version
77
- hash: 3
78
- segments:
79
- - 0
80
- version: "0"
81
- required_rubygems_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
34
  none: false
83
- requirements:
84
- - - ">="
85
- - !ruby/object:Gem::Version
86
- hash: 3
87
- segments:
88
- - 0
89
- version: "0"
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
90
39
  requirements: []
91
-
92
40
  rubyforge_project:
93
- rubygems_version: 1.5.2
41
+ rubygems_version: 1.8.23
94
42
  signing_key:
95
43
  specification_version: 3
96
- summary: ScraperWiki client library for Ruby
44
+ summary: ScraperWiki
97
45
  test_files: []
98
-
@@ -1,109 +0,0 @@
1
- require 'json'
2
- require 'singleton'
3
- require 'thread'
4
- require 'cgi'
5
-
6
- # the python version of this makes use of a global static copy of the class
7
- # so the connection is made only once to the dataproxy
8
- # I think the Singleton module implements this magically
9
-
10
- class SW_DataStore
11
-
12
- @@lock = Mutex.new
13
-
14
- include Singleton
15
-
16
- attr_accessor :m_port, :m_host, :m_scrapername, :m_runid, :m_attachables, :m_webstore_port
17
-
18
- def initialize
19
- @m_socket = nil
20
- @m_host = nil
21
- @m_port = nil
22
- @m_scrapername = ''
23
- @m_runid = ''
24
- @m_attachables = []
25
- @webstore_port = 0
26
- end
27
-
28
-
29
- def ensure_connected
30
- # Connect to the data proxy. The data proxy will need to make an Ident call
31
- # back to get the scraperID. Since the data proxy may be on another machine
32
- # and the peer address it sees will have been subject to NAT or masquerading,
33
- # send the UML name and the socket port number in the request.
34
-
35
- if @m_socket == nil
36
- @m_socket = TCPSocket.open(@m_host, @m_port)
37
- proto, port, name, ip = @m_socket.addr()
38
- if @m_scrapername == '' or @m_scrapername.nil?
39
- sname = ''
40
- else
41
- sname = CGI::escape(@m_scrapername)
42
- end
43
- if @m_runid == '' or @m_runid.nil?
44
- rid = ''
45
- else
46
- rid = CGI::escape(@m_runid)
47
- end
48
-
49
- getmsg = "GET /?uml=%s&port=%s&vscrapername=%s&vrunid=%s HTTP/1.1\n\n" % ['lxc', port, sname, rid]
50
- @m_socket.send(getmsg, 0)
51
- @m_socket.flush()
52
-
53
- buffer = @m_socket.recv(1024)
54
- result = JSON.parse(buffer)
55
- if result["status"] != "good"
56
- raise result["status"]
57
- end
58
- end
59
- end
60
-
61
- def request (req)
62
- text = ''
63
- @@lock.synchronize {
64
- ensure_connected
65
- reqmsg = JSON.generate(req) + "\n"
66
-
67
- bytes_sent = 0
68
- while bytes_sent < reqmsg.length
69
- bytes_sent += @m_socket.send(reqmsg.slice(bytes_sent, reqmsg.length), 0)
70
- end
71
- @m_socket.flush()
72
-
73
- while true
74
- buffer = @m_socket.recv(1024)
75
- if buffer.length == 0
76
- break
77
- end
78
- text += buffer
79
- if text[-1] == "\n"[0]
80
- break
81
- end
82
- end
83
- }
84
- return JSON.parse(text)
85
- end
86
-
87
- # function used to both initialize the settings and get an instance!
88
- # this is ridiculous and unnecessary with new webstore.
89
- # we are creating object without the fields merely to access the static variables!
90
- def SW_DataStore.create(host=nil, port = nil, scrapername = '', runid = nil, attachables = nil, webstore_port = nil)
91
- instance = SW_DataStore.instance
92
- # so, it might be intended that the host and port are
93
- # set once, never to be changed, but this is ruby so
94
- # there's no way to guarantee that.
95
- if host && port && instance.m_port.nil? && instance.m_host.nil?
96
- instance.m_host = host
97
- instance.m_port = port
98
- instance.m_scrapername = scrapername
99
- instance.m_runid = runid
100
- instance.m_attachables = attachables
101
- instance.m_webstore_port = webstore_port
102
- elsif host && port
103
- raise "Can't change host and port once connection made"
104
- elsif !(instance.m_port) || !(instance.m_host)
105
- raise "Can't return a datastore without port/host information"
106
- end
107
- instance
108
- end
109
- end
@@ -1,51 +0,0 @@
1
- def _get_stackentry(code_filename, code, filename, linenumber, funcname)
2
- nlinenumber = linenumber.to_i
3
- stackentry = {"file" => filename, "linenumber" => nlinenumber, "duplicates" => 1}
4
-
5
- if filename == "(eval)" or filename == code_filename
6
- codelines = code.split("\n")
7
- if (nlinenumber >= 1) && (nlinenumber <= codelines.size)
8
- stackentry["linetext"] = codelines[nlinenumber-1]
9
- elsif (nlinenumber == codelines.size + 1)
10
- stackentry["linetext"] = "<end of file>"
11
- else
12
- stackentry["linetext"] = "getExceptionTraceback: ScraperWiki internal error, line %d out of range in file %s" % [nlinenumber, code_filename]
13
- end
14
- stackentry["file"] = "<string>"
15
- else
16
- # XXX bit of a hack to show the line number in third party libraries
17
- stackentry["file"] += ":" + linenumber
18
- end
19
- if funcname
20
- stackentry["furtherlinetext"] = funcname
21
- end
22
- return stackentry
23
- end
24
-
25
- def getExceptionTraceback(e, code, code_filename)
26
- lbacktrace = e.backtrace.reverse
27
- #File.open("/tmp/fairuby", 'a') {|f| f.write(JSON.generate(lbacktrace)) }
28
-
29
- exceptiondescription = e.to_s
30
-
31
- stackdump = []
32
- for l in lbacktrace
33
- (filename, linenumber, funcname) = l.split(":")
34
-
35
- next if filename.match(/\/exec.rb$/) # skip showing stack of wrapper
36
-
37
- stackentry = _get_stackentry(code_filename, code, filename, linenumber, funcname)
38
- stackdump.push(stackentry)
39
- end
40
-
41
- if e.kind_of?(SyntaxError)
42
- (filename, linenumber, message) = exceptiondescription.split(/[:\n]/, 3)
43
- exceptiondescription = message
44
-
45
- stackentry = _get_stackentry(code_filename, code, filename, linenumber, nil)
46
- stackdump.push(stackentry)
47
- end
48
-
49
- return { 'message_type' => 'exception', 'exceptiondescription' => exceptiondescription, "stackdump" => stackdump }
50
- end
51
-
@@ -1,4 +0,0 @@
1
-
2
- module ScraperWiki
3
- VERSION = "1.0.1"
4
- end