harvesterreporter 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/harvesterreporter.rb +42 -0
  3. metadata +45 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f73fb3f8cd5bded2829d4d442174aebe3847a4d9
4
+ data.tar.gz: 1b39ee438b65279bd66847eb2493a75cf2afc961
5
+ SHA512:
6
+ metadata.gz: 61cb9222dc77d8a5acbcaf8d534bdae945428b9e5512ab2812c16a51abd90553453242afbc6c867bee5d1034caf958035ef502e218ab3dd11019ec11c06a6493
7
+ data.tar.gz: 47f20585fbfce28c29c5205ea4b12fa407e5c1c757af05b7beb4cb89bbf714144b855a0d1d58f3ad8a78c54b4a9f7e52bf27dfd66a248f4d14bf6371e8a30bec
@@ -0,0 +1,42 @@
1
+ require 'curb'
2
+ require 'json'
3
+
4
+ class HarvesterReporter
5
+ def initialize(cm_hash = nil)
6
+ @cm_url = cm_hash[:crawler_manager_url] if cm_hash
7
+ @selector_id = cm_hash[:selector_id] if cm_hash
8
+
9
+ # Output array for when Harvester isn't used
10
+ @output = Array.new
11
+ end
12
+
13
+ # Figure out how to report results
14
+ def report_results(results, link)
15
+ if @cm_url
16
+ report_incremental(results, link)
17
+ else
18
+ report_batch(results)
19
+ end
20
+ end
21
+
22
+ # Report all results in one JSON
23
+ def report_batch(results)
24
+ results.each do |result|
25
+ @output.push(result)
26
+ end
27
+ end
28
+
29
+ # Report results back to Harvester incrementally
30
+ def report_incremental(results, link)
31
+ curl_url = @cm_url+"/relay_results"
32
+ c = Curl::Easy.http_post(curl_url,
33
+ Curl::PostField.content('selector_id', @selector_id),
34
+ Curl::PostField.content('status_message', "Collected " + link),
35
+ Curl::PostField.content('results', JSON.pretty_generate(results)))
36
+ end
37
+
38
+ # Output JSON
39
+ def gen_json
40
+ return JSON.pretty_generate(@output)
41
+ end
42
+ end
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: harvesterreporter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - M. C. McGrath
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-03-06 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Incremental result reporting for Transparency Toolkit
14
+ email: shidash@shidash.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/harvesterreporter.rb
20
+ homepage: https://github.com/TransparencyToolkit/HarvesterReporter
21
+ licenses:
22
+ - GPL
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.8
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: Reports results from crawlers to Harvester
44
+ test_files: []
45
+ has_rdoc: