harvesterreporter 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/harvesterreporter.rb +42 -0
  3. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f73fb3f8cd5bded2829d4d442174aebe3847a4d9
4
+ data.tar.gz: 1b39ee438b65279bd66847eb2493a75cf2afc961
5
+ SHA512:
6
+ metadata.gz: 61cb9222dc77d8a5acbcaf8d534bdae945428b9e5512ab2812c16a51abd90553453242afbc6c867bee5d1034caf958035ef502e218ab3dd11019ec11c06a6493
7
+ data.tar.gz: 47f20585fbfce28c29c5205ea4b12fa407e5c1c757af05b7beb4cb89bbf714144b855a0d1d58f3ad8a78c54b4a9f7e52bf27dfd66a248f4d14bf6371e8a30bec
@@ -0,0 +1,42 @@
1
+ require 'curb'
2
+ require 'json'
3
+
4
+ class HarvesterReporter
5
+ def initialize(cm_hash = nil)
6
+ @cm_url = cm_hash[:crawler_manager_url] if cm_hash
7
+ @selector_id = cm_hash[:selector_id] if cm_hash
8
+
9
+ # Output array for when Harvester isn't used
10
+ @output = Array.new
11
+ end
12
+
13
+ # Figure out how to report results
14
+ def report_results(results, link)
15
+ if @cm_url
16
+ report_incremental(results, link)
17
+ else
18
+ report_batch(results)
19
+ end
20
+ end
21
+
22
+ # Report all results in one JSON
23
+ def report_batch(results)
24
+ results.each do |result|
25
+ @output.push(result)
26
+ end
27
+ end
28
+
29
+ # Report results back to Harvester incrementally
30
+ def report_incremental(results, link)
31
+ curl_url = @cm_url+"/relay_results"
32
+ c = Curl::Easy.http_post(curl_url,
33
+ Curl::PostField.content('selector_id', @selector_id),
34
+ Curl::PostField.content('status_message', "Collected " + link),
35
+ Curl::PostField.content('results', JSON.pretty_generate(results)))
36
+ end
37
+
38
+ # Output JSON
39
+ def gen_json
40
+ return JSON.pretty_generate(@output)
41
+ end
42
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: harvesterreporter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - M. C. McGrath
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-03-06 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Incremental result reporting for Transparency Toolkit
14
+ email: shidash@shidash.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/harvesterreporter.rb
20
+ homepage: https://github.com/TransparencyToolkit/HarvesterReporter
21
+ licenses:
22
+ - GPL
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.8
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Reports results from crawlers to Harvester
44
+ test_files: []
45
+ has_rdoc: