scraperwiki 1.0.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,480 +1,124 @@
1
- require 'json'
2
- require 'uri'
3
- require 'net/http'
4
- require 'scraperwiki/datastore'
5
- require 'httpclient'
6
-
7
- class SqliteException < RuntimeError
8
- end
9
-
10
- class NoSuchTableSqliteException < SqliteException
11
- end
12
-
13
- $apiwrapperattacheddata = [ ]
1
+ require 'sqlite3'
2
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
3
+ require 'scraperwiki/sqlite_save_info.rb'
14
4
 
15
5
  module ScraperWiki
16
6
 
17
- $metadatamessagedone = false
18
- $attachlist = [ ]
19
-
20
- def ScraperWiki.dumpMessage(hash)
21
- msg = JSON.generate(hash)
22
- $logfd.write( "JSONRECORD(" + msg.length.to_s() + "):" + msg + "\n")
23
- $logfd.flush()
24
- end
25
-
26
- def ScraperWiki.httpresponseheader(headerkey, headervalue)
27
- ScraperWiki.dumpMessage({'message_type' => 'httpresponseheader', 'headerkey' => headerkey, 'headervalue' => headervalue})
28
- end
29
-
30
- def ScraperWiki.scrape(url, params = nil)
31
- client = HTTPClient.new
7
+ # The scrape method fetches the content from a webserver.
8
+ #
9
+ # === Parameters
10
+ #
11
+ # * _url_ = The URL to fetch
12
+ # * _params_ = The parameters to send with a POST request
13
+ # * _agent = A manually supplied useragent string
14
+ #
15
+ # === Example
16
+ # ScraperWiki::scrape('http://scraperwiki.com')
17
+ #
18
+ def ScraperWiki.scrape(url, params = nil, agent = nil)
19
+ if agent
20
+ client = HTTPClient.new(:agent_name => agent)
21
+ else
22
+ client = HTTPClient.new
23
+ end
32
24
  client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
25
+ if HTTPClient.respond_to?("client.transparent_gzip_decompression=")
26
+ client.transparent_gzip_decompression = true
27
+ end
33
28
 
34
- if params.nil?
35
- return client.get_content(url)
29
+ if params.nil?
30
+ html = client.get_content(url)
36
31
  else
37
- return client.post_content(url, params)
32
+ html = client.post_content(url, params)
38
33
  end
39
- end
40
34
 
41
- def ScraperWiki.gb_postcode_to_latlng(postcode)
42
- uri = URI.parse("http://views.scraperwiki.com/run/uk_postcode_lookup/?postcode="+URI.escape(postcode))
43
- sres = Net::HTTP.get(uri)
44
- jres = JSON.parse(sres)
45
- if jres["lat"] and jres["lng"]
46
- return [jres["lat"], jres["lng"]]
35
+ unless HTTPClient.respond_to?("client.transparent_gzip_decompression=")
36
+ begin
37
+ gz = Zlib::GzipReader.new(StringIO.new(html))
38
+ return gz.read
39
+ rescue
40
+ return html
47
41
  end
48
- return nil
42
+ end
49
43
  end
50
44
 
45
+ # Saves the provided data into a local database for this scraper. Data is upserted
46
+ # into this table (inserted if it does not exist, updated if the unique keys say it
47
+ # does).
48
+ #
49
+ # === Parameters
50
+ #
51
+ # * _unique_keys_ = A list of column names, that used together should be unique
52
+ # * _data_ = A hash of the data where the Key is the column name, the Value the row
53
+ # value. If sending lots of data this can be a list of hashes.
54
+ # * _table_name_ = The name that the newly created table should use.
55
+ #
56
+ # === Example
57
+ # ScraperWiki::save(['id'], {'id'=>1})
58
+ #
59
+ def ScraperWiki.save_sqlite(unique_keys, data, table_name="swdata")
60
+ raise 'unique_keys must be nil or an array' if unique_keys != nil && !unique_keys.kind_of?(Array)
61
+ raise 'data must have a non-nil value' if data == nil
51
62
 
63
+ # convert :symbols to "strings"
64
+ unique_keys = unique_keys.map { |x| x.kind_of?(Symbol) ? x.to_s : x }
52
65
 
53
- def ScraperWiki._unicode_truncate(string, size)
54
- # Stops 2 byte unicode characters from being chopped in half which kills JSON serializer
55
- string.scan(/./u)[0,size].join
56
- end
57
-
58
- def ScraperWiki.save(unique_keys, data, date=nil, latlng=nil, table_name="swdata")
59
- if unique_keys != nil && !unique_keys.kind_of?(Array)
60
- raise 'unique_keys must be nil or an array'
61
- end
62
- if data == nil
63
- raise 'data must have a non-nil value'
64
- end
65
-
66
- ds = SW_DataStore.create()
67
- ldata = data.dup
68
- if date != nil
69
- ldata["date"] = date
70
- end
71
- if latlng != nil
72
- ldata["latlng_lat"] = latlng[0]
73
- ldata["latlng_lng"] = latlng[1]
66
+ if data.class == Hash
67
+ data = [ data ]
68
+ elsif data.length == 0
69
+ return
74
70
  end
75
- return ScraperWiki.save_sqlite(unique_keys, ldata, table_name="swdata", verbose=2)
76
- end
77
-
78
71
 
79
- def ScraperWiki.sqliteexecute(sqlquery, data=nil, verbose=2)
80
- ds = SW_DataStore.create()
81
- if ds.m_webstore_port == 0
82
- res = ds.request({'maincommand'=>'sqliteexecute', 'sqlquery'=>sqlquery, 'data'=>data, 'attachlist'=>$attachlist})
83
- else
84
- username = 'resourcedir' # gets it into the right subdirectory automatically!!!
85
- dirscrapername = ds.m_scrapername
86
- if ds.m_scrapername == '' or ds.m_scrapername.nil?
87
- dirscrapername = 'DRAFT__' + ds.m_runid.gsub(/[\.\-]/, '_')
88
- end
89
- path = "%s/%s" % [username, dirscrapername]
90
-
91
- record = {"query"=>sqlquery, "params"=>data, "attach"=>[]}
92
- $attachlist.each do |value|
93
- record["attach"].push({"user"=>username, "database"=>value["name"], "alias"=>value["asattach"], "securityhash"=>"somthing"})
94
- end
95
-
96
- httpcall = Net::HTTP.new(ds.m_host, ds.m_webstore_port)
97
- headers = { "Accept"=>"application/json+tuples", "X-Scrapername"=>ds.m_scrapername, "X-Runid"=>ds.m_runid, "Content-Type"=>"application/json" }
98
- response = httpcall.put(path, JSON.generate(record), headers)
99
- res = JSON.parse(response.body)
100
- if res["state"] == "error"
101
- ScraperWiki.raisesqliteerror(res["message"])
102
- end
103
- if (res.class == Hash) and (res["keys"].class == Array) and (res["data"].class == Array)
104
- if res["keys"].include?("state") and (res["data"].length == 1)
105
- ddata = Hash[*res["keys"].zip(res["data"][0]).flatten]
106
- if ddata["state"] == "error"
107
- ScraperWiki.raisesqliteerror(ddata["message"])
108
- end
109
- end
110
- end
111
- end
72
+ rjdata = [ ]
73
+ for ldata in data
74
+ ljdata = _convdata(unique_keys, ldata)
75
+ rjdata.push(ljdata)
112
76
 
113
- if verbose
114
- if data.kind_of?(Array)
115
- data.each do |value|
116
- ldata = [ ]
117
- if value == nil
118
- value = ''
119
- end
120
- ldata.push(ScraperWiki._unicode_truncate(value.to_s, 50))
121
- end
122
- else
123
- ldata = data
124
- end
125
- ScraperWiki.dumpMessage({'message_type'=>'sqlitecall', 'command'=>"execute", 'val1'=>sqlquery, 'val2'=>ldata})
126
77
  end
127
- return res
128
- end
129
-
130
78
 
79
+ SQLiteMagic._do_save_sqlite(unique_keys, rjdata, table_name)
80
+ end
131
81
 
132
- # this ought to be a local function
82
+ # Internal function to check a row of data, convert to right format
133
83
  def ScraperWiki._convdata(unique_keys, scraper_data)
134
84
  if unique_keys
135
85
  for key in unique_keys
136
86
  if !key.kind_of?(String) and !key.kind_of?(Symbol)
137
- return { "error" => 'unique_keys must each be a string or a symbol', "bad_key" => key }
87
+ return 'unique_keys must each be a string or a symbol, this one is not: ' + key
138
88
  end
139
89
  if !scraper_data.include?(key) and !scraper_data.include?(key.to_sym)
140
- return { "error" => 'unique_keys must be a subset of data', "bad_key" => key }
90
+ return 'unique_keys must be a subset of data, this one is not: ' + key
141
91
  end
142
92
  if scraper_data[key] == nil and scraper_data[key.to_sym] == nil
143
- return { "error" => 'unique_key value should not be nil', "bad_key" => key }
93
+ return 'unique_key value should not be nil, this one is nil: ' + key
144
94
  end
145
95
  end
146
96
  end
147
97
 
148
98
  jdata = { }
149
99
  scraper_data.each_pair do |key, value|
150
- if not key
151
- return { "error" => 'key must not be blank', "bad_key" => key }
152
- end
153
- if key.kind_of?(Symbol)
154
- key = key.to_s
155
- end
156
- if key.class != String
157
- return { "error" => 'key must be string type', "bad_key" => key }
158
- end
100
+ raise 'key must not have blank name' if not key
159
101
 
160
- if !/[a-zA-Z0-9_\- ]+$/.match(key)
161
- return { "error"=>'key must be simple text', "bad_key"=> key }
162
- end
102
+ key = key.to_s if key.kind_of?(Symbol)
103
+ raise 'key must be string or symbol type: ' + key if key.class != String
104
+ raise 'key must be simple text: ' + key if !/[a-zA-Z0-9_\- ]+$/.match(key)
163
105
 
164
- if value.kind_of?(Date)
106
+ # convert formats
107
+ if value.kind_of?(Date)
165
108
  value = value.iso8601
166
109
  end
167
110
  if value.kind_of?(Time)
168
111
  value = value.iso8601
169
- raise "internal error, timezone came out as non-UTC while converting to SQLite format" unless value.match(/\+00:00$/)
170
- value.gsub!(/\+00:00$/, '')
112
+ raise "internal error, timezone came out as non-UTC while converting to SQLite format" unless value.match(/([+-]00:00|Z)$/)
113
+ value.gsub!(/([+-]00:00|Z)$/, '')
171
114
  end
172
115
  if ![Fixnum, Float, String, TrueClass, FalseClass, NilClass].include?(value.class)
173
116
  value = value.to_s
174
117
  end
118
+
175
119
  jdata[key] = value
176
120
  end
177
121
  return jdata
178
122
  end
179
123
 
180
-
181
- def ScraperWiki.save_sqlite(unique_keys, data, table_name="swdata", verbose=2)
182
- if !data
183
- ScraperWiki.dumpMessage({'message_type' => 'data', 'content' => "EMPTY SAVE IGNORED"})
184
- return
185
- end
186
-
187
- # convert :symbols to "strings"
188
- unique_keys = unique_keys.map { |x| x.kind_of?(Symbol) ? x.to_s : x }
189
-
190
- if data.class == Hash
191
- data = [ data ]
192
- end
193
-
194
- rjdata = [ ]
195
- for ldata in data
196
- ljdata = _convdata(unique_keys, ldata)
197
- if ljdata.include?("error")
198
- raise SqliteException.new(ljdata["error"])
199
- end
200
- rjdata.push(ljdata)
201
- end
202
-
203
- ds = SW_DataStore.create()
204
- if ds.m_webstore_port == 0
205
- res = ds.request({'maincommand'=>'save_sqlite', 'unique_keys'=>unique_keys, 'data'=>rjdata, 'swdatatblname'=>table_name})
206
- else
207
- username = 'resourcedir' # gets it into the right subdirectory automatically!!!
208
- dirscrapername = ds.m_scrapername
209
- if ds.m_scrapername == '' or ds.m_scrapername.nil?
210
- dirscrapername = 'DRAFT__' + ds.m_runid.gsub(/[\.\-]/, '_')
211
- end
212
-
213
- # (do something about jargtypes later)
214
- qsl = [ ]
215
- unique_keys.each do |key|
216
- qsl.push("unique="+URI.encode(key))
217
- end
218
-
219
- # quick and dirty provision of column types to the webstore
220
- if rjdata.length != 0
221
- jargtypes = { }
222
- rjdata[0].each_pair do |k, v|
223
- if v != nil
224
- #if k[-5..-1] == "_blob"
225
- # vt = "blob" # coerced into affinity none
226
- if v.class == Fixnum
227
- vt = "integer"
228
- elsif v.class == Float
229
- vt = "real"
230
- else
231
- vt = "text"
232
- end
233
- jargtypes[k] = vt
234
- end
235
- end
236
- qsl.push(("jargtypes="+JSON.generate(jargtypes)))
237
- end
238
-
239
- path = "%s/%s/%s?%s" % [username, dirscrapername, table_name, qsl.join("&")]
240
- #puts JSON.generate(rjdata)
241
- httpcall = Net::HTTP.new(ds.m_host, ds.m_webstore_port)
242
- headers = { "Accept"=>"application/json", "X-Scrapername"=>ds.m_scrapername, "X-Runid"=>ds.m_runid, "Content-Type"=>"application/json" }
243
- response = httpcall.post(path, JSON.generate(rjdata), headers)
244
- #puts response.body
245
- res = JSON.parse(response.body)
246
- if res["state"] == "error"
247
- res["error"] = res["message"]
248
- end
249
- end
250
-
251
-
252
- if res["error"]
253
- raise SqliteException.new(res["error"])
254
- end
255
-
256
- if verbose >= 2
257
- pdata = { }
258
- if rjdata.class == Hash
259
- sdata = rjdata
260
- else
261
- sdata = rjdata[0]
262
- end
263
- sdata.each_pair do |key, value|
264
- key = ScraperWiki._unicode_truncate(key.to_s, 50)
265
- if value == nil
266
- value = ''
267
- else
268
- value = ScraperWiki._unicode_truncate(value.to_s, 50)
269
- end
270
- pdata[key] = String(value)
271
- end
272
- if rjdata.class == Array and rjdata.size > 1
273
- pdata["number_records"] = "Number Records: "+String(rjdata.size)
274
- end
275
- ScraperWiki.dumpMessage({'message_type' => 'data', 'content' => pdata})
276
- end
277
- return res
278
- end
279
-
280
- # also needs to handle the types better (could save json and datetime objects handily
281
- def ScraperWiki.save_var(name, value, verbose=2)
282
- vtype = String(value.class)
283
- svalue = value.to_s
284
- if vtype != "Fixnum" and vtype != "String" and vtype != "Float" and vtype != "NilClass"
285
- puts "*** object of type "+vtype+" converted to string\n"
286
- end
287
- data = { "name" => name, "value_blob" => svalue, "type" => vtype }
288
- ScraperWiki.save_sqlite(unique_keys=["name"], data=data, table_name="swvariables", verbose=verbose)
289
- end
290
-
291
- def ScraperWiki.get_var(name, default=nil, verbose=2)
292
- begin
293
- result = ScraperWiki.sqliteexecute("select value_blob, type from swvariables where name=?", [name], verbose)
294
- rescue NoSuchTableSqliteException => e
295
- return default
296
- end
297
-
298
- if !result.has_key?("data")
299
- return default
300
- end
301
-
302
- if result["data"].length == 0
303
- return default
304
- end
305
- # consider casting to type
306
- svalue = result["data"][0][0]
307
- vtype = result["data"][0][1]
308
- if vtype == "Fixnum"
309
- return svalue.to_i
310
- end
311
- if vtype == "Float"
312
- return svalue.to_f
313
- end
314
- if vtype == "NilClass"
315
- return nil
316
- end
317
- return svalue
318
- end
319
-
320
- # These are DEPRECATED and just here for compatibility
321
- def ScraperWiki.get_metadata(metadata_name, default = nil)
322
- if !$metadatamessagedone == nil
323
- puts "*** instead of get_metadata('"+metadata_name+"') please use\n get_var('"+metadata_name+"')"
324
- metadatamessagedone = true
325
- end
326
- result = ScraperWiki.get_var(metadata_name, default)
327
- return result
328
- end
329
-
330
- # These are DEPRECATED and just here for compatibility
331
- def ScraperWiki.save_metadata(metadata_name, value)
332
- if !$metadatamessagedone
333
- puts "*** instead of save_metadata('"+metadata_name+"') please use\n save_var('"+metadata_name+"')"
334
- $metadatamessagedone = true
335
- end
336
- return ScraperWiki.save_var(metadata_name, value)
337
- end
338
-
339
-
340
- def ScraperWiki.show_tables(dbname=nil)
341
- name = "sqlite_master"
342
- if dbname != nil
343
- name = "`"+dbname+"`.sqlite_master"
344
- end
345
- result = ScraperWiki.sqliteexecute("select tbl_name, sql from "+name+" where type='table'")
346
- #return result["data"]
347
- return (Hash[*result["data"].flatten]) # pre-1.8.7
348
- end
349
-
350
-
351
- def ScraperWiki.table_info(name)
352
- sname = name.split(".")
353
- if sname.length == 2
354
- result = ScraperWiki.sqliteexecute("PRAGMA %s.table_info(`%s`)" % sname)
355
- else
356
- result = ScraperWiki.sqliteexecute("PRAGMA table_info(`%s`)" % name)
357
- end
358
- res = [ ]
359
- for d in result["data"]
360
- res.push(Hash[*result["keys"].zip(d).flatten]) # pre-1.8.7
361
- end
362
- return res
363
- end
364
-
365
-
366
- def ScraperWiki.getDataByDate(name, start_date, end_date, limit=-1, offset=0)
367
- raise SqliteException.new("getDataByDate has been deprecated")
368
- end
369
-
370
- def ScraperWiki.getDataByLocation(name, lat, lng, limit=-1, offset=0)
371
- raise SqliteException.new("getDataByLocation has been deprecated")
372
- end
373
-
374
- def ScraperWiki.search(name, filterdict, limit=-1, offset=0)
375
- raise SqliteException.new("SW_APIWrapper.search has been deprecated")
376
- end
377
-
378
- def ScraperWiki.raisesqliteerror(rerror)
379
- if /sqlite3.Error: no such table:/.match(rerror) # old dataproxy
380
- raise NoSuchTableSqliteException.new(rerror)
381
- end
382
- if /DB Error: \(OperationalError\) no such table:/.match(rerror)
383
- raise NoSuchTableSqliteException.new(rerror)
384
- end
385
- raise SqliteException.new(rerror)
386
- end
387
-
388
- def ScraperWiki.attach(name, asname=nil, verbose=1)
389
- $attachlist.push({"name"=>name, "asname"=>asname})
390
-
391
- ds = SW_DataStore.create()
392
-
393
- if ds.m_webstore_port == 0
394
- res = ds.request({'maincommand'=>'sqlitecommand', 'command'=>"attach", 'name'=>name, 'asname'=>asname})
395
- if res["error"]
396
- ScraperWiki.raisesqliteerror(res)
397
- end
398
- else
399
- res = {'status'=>'ok'}
400
- end
401
-
402
- if verbose
403
- ScraperWiki.dumpMessage({'message_type'=>'sqlitecall', 'command'=>"attach", 'val1'=>name, 'val2'=>asname})
404
- end
405
-
406
- return res
407
- end
408
-
409
-
410
- def ScraperWiki.commit(verbose=1)
411
- ds = SW_DataStore.create()
412
- if ds.m_webstore_port == 0
413
- res = ds.request({'maincommand'=>'sqlitecommand', 'command'=>"commit"})
414
- else
415
- puts "*** commit() no longer a necessary function call"
416
- res = {'status'=>'ok'}
417
- end
418
- end
419
-
420
- def ScraperWiki.select(sqlquery, data=nil, verbose=1)
421
- if data != nil && sqlquery.scan(/\?/).length != 0 && data.class != Array
422
- data = [data]
423
- end
424
- result = ScraperWiki.sqliteexecute("select "+sqlquery, data, verbose)
425
- res = [ ]
426
- for d in result["data"]
427
- #res.push(Hash[result["keys"].zip(d)]) # post-1.8.7
428
- res.push(Hash[*result["keys"].zip(d).flatten]) # pre-1.8.7
429
- end
430
- return res
431
- end
432
-
433
- # old functions put back in for regression
434
- def ScraperWiki.getData(name, limit=-1, offset=0)
435
- if !$apiwrapperattacheddata.include?(name)
436
- puts "*** instead of getData('"+name+"') please use\n ScraperWiki.attach('"+name+"') \n print ScraperWiki.select('* from `"+name+"`.swdata')"
437
- ScraperWiki.attach(name)
438
- $apiwrapperattacheddata.push(name)
439
- end
440
-
441
- apilimit = 500
442
- g = Enumerator.new do |g|
443
- count = 0
444
- while true
445
- if limit == -1
446
- step = apilimit
447
- else
448
- step = apilimit < (limit - count) ? apilimit : limit - count
449
- end
450
- query = "* from `#{name}`.swdata limit #{step} offset #{offset+count}"
451
-
452
- records = ScraperWiki.select(query)
453
- for r in records
454
- g.yield r
455
- end
456
-
457
- count += records.length
458
- if records.length < step
459
- break
460
- end
461
- if limit != -1 and count >= limit
462
- break
463
- end
464
- end
465
- end
466
- end
467
-
468
- def ScraperWiki.getKeys(name)
469
- if !$apiwrapperattacheddata.include?(name)
470
- puts "*** instead of getKeys('"+name+"') please use\n ScraperWiki.attach('"+name+"') \n print ScraperWiki.sqliteexecute('select * from `"+name+"`.swdata limit 0')['keys']"
471
- ScraperWiki.attach(name)
472
- $apiwrapperattacheddata.push(name)
473
- end
474
- result = ScraperWiki.sqliteexecute("select * from `"+name+"`.swdata limit 0")
475
- if result.include?("error")
476
- raise SqliteException.new(result["error"])
477
- end
478
- return result["keys"]
479
- end
480
124
  end
metadata CHANGED
@@ -1,98 +1,45 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: scraperwiki
3
- version: !ruby/object:Gem::Version
4
- hash: 21
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
5
  prerelease:
6
- segments:
7
- - 1
8
- - 0
9
- - 1
10
- version: 1.0.1
11
6
  platform: ruby
12
- authors:
13
- - Francis Irving
7
+ authors:
8
+ - Francis irving
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2011-10-09 00:00:00 +02:00
19
- default_executable:
20
- dependencies:
21
- - !ruby/object:Gem::Dependency
22
- name: json
23
- prerelease: false
24
- requirement: &id001 !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ">="
28
- - !ruby/object:Gem::Version
29
- hash: 3
30
- segments:
31
- - 0
32
- version: "0"
33
- type: :runtime
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
36
- name: httpclient
37
- prerelease: false
38
- requirement: &id002 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- hash: 3
44
- segments:
45
- - 0
46
- version: "0"
47
- type: :runtime
48
- version_requirements: *id002
49
- description: Ruby code used for accessing
50
- email:
51
- - francis@scraperwiki.com
12
+ date: 2013-04-04 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A library for scraping web pages and saving data easily
15
+ email: francis@scraperwiki.com
52
16
  executables: []
53
-
54
17
  extensions: []
55
-
56
18
  extra_rdoc_files: []
57
-
58
- files:
59
- - lib/version.rb
19
+ files:
60
20
  - lib/scraperwiki.rb
61
- - lib/scraperwiki/datastore.rb
62
- - lib/scraperwiki/stacktrace.rb
63
- has_rdoc: true
64
- homepage: http://scraperwiki.com
21
+ homepage: http://rubygems.org/gems/scraperwiki
65
22
  licenses: []
66
-
67
23
  post_install_message:
68
24
  rdoc_options: []
69
-
70
- require_paths:
25
+ require_paths:
71
26
  - lib
72
- required_ruby_version: !ruby/object:Gem::Requirement
27
+ required_ruby_version: !ruby/object:Gem::Requirement
73
28
  none: false
74
- requirements:
75
- - - ">="
76
- - !ruby/object:Gem::Version
77
- hash: 3
78
- segments:
79
- - 0
80
- version: "0"
81
- required_rubygems_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
82
34
  none: false
83
- requirements:
84
- - - ">="
85
- - !ruby/object:Gem::Version
86
- hash: 3
87
- segments:
88
- - 0
89
- version: "0"
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
90
39
  requirements: []
91
-
92
40
  rubyforge_project:
93
- rubygems_version: 1.5.2
41
+ rubygems_version: 1.8.23
94
42
  signing_key:
95
43
  specification_version: 3
96
- summary: ScraperWiki client library for Ruby
44
+ summary: ScraperWiki
97
45
  test_files: []
98
-
@@ -1,109 +0,0 @@
1
- require 'json'
2
- require 'singleton'
3
- require 'thread'
4
- require 'cgi'
5
-
6
- # the python version of this makes use of a global static copy of the class
7
- # so the connection is made only once to the dataproxy
8
- # I think the Singleton module implements this magically
9
-
10
- class SW_DataStore
11
-
12
- @@lock = Mutex.new
13
-
14
- include Singleton
15
-
16
- attr_accessor :m_port, :m_host, :m_scrapername, :m_runid, :m_attachables, :m_webstore_port
17
-
18
- def initialize
19
- @m_socket = nil
20
- @m_host = nil
21
- @m_port = nil
22
- @m_scrapername = ''
23
- @m_runid = ''
24
- @m_attachables = []
25
- @webstore_port = 0
26
- end
27
-
28
-
29
- def ensure_connected
30
- # Connect to the data proxy. The data proxy will need to make an Ident call
31
- # back to get the scraperID. Since the data proxy may be on another machine
32
- # and the peer address it sees will have been subject to NAT or masquerading,
33
- # send the UML name and the socket port number in the request.
34
-
35
- if @m_socket == nil
36
- @m_socket = TCPSocket.open(@m_host, @m_port)
37
- proto, port, name, ip = @m_socket.addr()
38
- if @m_scrapername == '' or @m_scrapername.nil?
39
- sname = ''
40
- else
41
- sname = CGI::escape(@m_scrapername)
42
- end
43
- if @m_runid == '' or @m_runid.nil?
44
- rid = ''
45
- else
46
- rid = CGI::escape(@m_runid)
47
- end
48
-
49
- getmsg = "GET /?uml=%s&port=%s&vscrapername=%s&vrunid=%s HTTP/1.1\n\n" % ['lxc', port, sname, rid]
50
- @m_socket.send(getmsg, 0)
51
- @m_socket.flush()
52
-
53
- buffer = @m_socket.recv(1024)
54
- result = JSON.parse(buffer)
55
- if result["status"] != "good"
56
- raise result["status"]
57
- end
58
- end
59
- end
60
-
61
- def request (req)
62
- text = ''
63
- @@lock.synchronize {
64
- ensure_connected
65
- reqmsg = JSON.generate(req) + "\n"
66
-
67
- bytes_sent = 0
68
- while bytes_sent < reqmsg.length
69
- bytes_sent += @m_socket.send(reqmsg.slice(bytes_sent, reqmsg.length), 0)
70
- end
71
- @m_socket.flush()
72
-
73
- while true
74
- buffer = @m_socket.recv(1024)
75
- if buffer.length == 0
76
- break
77
- end
78
- text += buffer
79
- if text[-1] == "\n"[0]
80
- break
81
- end
82
- end
83
- }
84
- return JSON.parse(text)
85
- end
86
-
87
- # function used to both initialize the settings and get an instance!
88
- # this is ridiculous and unnecessary with new webstore.
89
- # we are creating object without the fields merely to access the static variables!
90
- def SW_DataStore.create(host=nil, port = nil, scrapername = '', runid = nil, attachables = nil, webstore_port = nil)
91
- instance = SW_DataStore.instance
92
- # so, it might be intended that the host and port are
93
- # set once, never to be changed, but this is ruby so
94
- # there's no way to guarantee that.
95
- if host && port && instance.m_port.nil? && instance.m_host.nil?
96
- instance.m_host = host
97
- instance.m_port = port
98
- instance.m_scrapername = scrapername
99
- instance.m_runid = runid
100
- instance.m_attachables = attachables
101
- instance.m_webstore_port = webstore_port
102
- elsif host && port
103
- raise "Can't change host and port once connection made"
104
- elsif !(instance.m_port) || !(instance.m_host)
105
- raise "Can't return a datastore without port/host information"
106
- end
107
- instance
108
- end
109
- end
@@ -1,51 +0,0 @@
1
- def _get_stackentry(code_filename, code, filename, linenumber, funcname)
2
- nlinenumber = linenumber.to_i
3
- stackentry = {"file" => filename, "linenumber" => nlinenumber, "duplicates" => 1}
4
-
5
- if filename == "(eval)" or filename == code_filename
6
- codelines = code.split("\n")
7
- if (nlinenumber >= 1) && (nlinenumber <= codelines.size)
8
- stackentry["linetext"] = codelines[nlinenumber-1]
9
- elsif (nlinenumber == codelines.size + 1)
10
- stackentry["linetext"] = "<end of file>"
11
- else
12
- stackentry["linetext"] = "getExceptionTraceback: ScraperWiki internal error, line %d out of range in file %s" % [nlinenumber, code_filename]
13
- end
14
- stackentry["file"] = "<string>"
15
- else
16
- # XXX bit of a hack to show the line number in third party libraries
17
- stackentry["file"] += ":" + linenumber
18
- end
19
- if funcname
20
- stackentry["furtherlinetext"] = funcname
21
- end
22
- return stackentry
23
- end
24
-
25
- def getExceptionTraceback(e, code, code_filename)
26
- lbacktrace = e.backtrace.reverse
27
- #File.open("/tmp/fairuby", 'a') {|f| f.write(JSON.generate(lbacktrace)) }
28
-
29
- exceptiondescription = e.to_s
30
-
31
- stackdump = []
32
- for l in lbacktrace
33
- (filename, linenumber, funcname) = l.split(":")
34
-
35
- next if filename.match(/\/exec.rb$/) # skip showing stack of wrapper
36
-
37
- stackentry = _get_stackentry(code_filename, code, filename, linenumber, funcname)
38
- stackdump.push(stackentry)
39
- end
40
-
41
- if e.kind_of?(SyntaxError)
42
- (filename, linenumber, message) = exceptiondescription.split(/[:\n]/, 3)
43
- exceptiondescription = message
44
-
45
- stackentry = _get_stackentry(code_filename, code, filename, linenumber, nil)
46
- stackdump.push(stackentry)
47
- end
48
-
49
- return { 'message_type' => 'exception', 'exceptiondescription' => exceptiondescription, "stackdump" => stackdump }
50
- end
51
-
@@ -1,4 +0,0 @@
1
-
2
- module ScraperWiki
3
- VERSION = "1.0.1"
4
- end