twittercrawler 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/twittercrawler.rb +35 -2
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b578c69314b844603dcc13e8968ce0ad5f11024b
4
- data.tar.gz: 0f7df31fc66bad9e79d890c50b43ffcaccee4f63
3
+ metadata.gz: 25d1d3c957757eeca51247c704e42f9c009a8d09
4
+ data.tar.gz: 55bcea247d9d2c926b6fdafe51c876de6ed30049
5
5
  SHA512:
6
- metadata.gz: 617241a871ac10423a4096a71b282217a8e53ce182b09df1cefe1ea3e625611a9f55b139cdd6429607d32583c49edf6e92c33c897e3c83f050c3bfbead1be7d5
7
- data.tar.gz: 3f1039aa7975a23b8f284ea3f255c15ce2344d9196ca48d48f0a8d78a737d84eac1cc590135bf553e7360b34b20916cec2901d3ee325ba98263c9ab4be24be04
6
+ metadata.gz: a2bf6e5315fae1c511f63d94dd928c99998d7ae791983837fc9564e5e070ee4ca8afe73c2a5d959855e2e38706688db5e511abf208873bb1981bb3d2341835eb
7
+ data.tar.gz: 8495e3060c561ee3eb4885a6e8686a64256cd8a85a17562110030eff6c2da66765487f67e72370dd31ae07c350cd4633bdd798756c9abdac4f7addc69aa5908d
@@ -6,11 +6,15 @@ require 'nokogiri'
6
6
  load 'twitter_parser.rb'
7
7
 
8
8
  class TwitterCrawler
9
- def initialize(search_term, operator, requests)
9
+ def initialize(search_term, operator, requests, cm_hash)
10
10
  @search_term = search_term
11
11
  @operator = operator
12
12
  @requests = requests
13
13
  @output = Array.new
14
+
15
+ # Handle crawler manager info
16
+ @cm_url = cm_hash[:crawler_manager_url] if cm_hash
17
+ @selector_id = cm_hash[:selector_id] if cm_hash
14
18
  end
15
19
 
16
20
  # Generate advanced query
@@ -36,9 +40,13 @@ class TwitterCrawler
36
40
 
37
41
  # Parse each tweet
38
42
  tweets.each do |tweet|
43
+ # Add tweet
39
44
  tweet_html = tweet.attribute("innerHTML")
40
45
  parser = TwitterParser.new(tweet_html)
41
- @output.push(parser.parse_tweet)
46
+ parsed_tweet = parser.parse_tweet
47
+
48
+ # Report results
49
+ report_results([pared_tweet], parsed_tweet[:tweet_link])
42
50
  end
43
51
  end
44
52
 
@@ -57,6 +65,31 @@ class TwitterCrawler
57
65
  end
58
66
  end
59
67
 
68
+ # Figure out how to report results
69
+ def report_results(results, link)
70
+ if @cm_url
71
+ report_incremental(results, link)
72
+ else
73
+ report_batch(results)
74
+ end
75
+ end
76
+
77
+ # Report all results in one JSON
78
+ def report_batch(results)
79
+ results.each do |result|
80
+ @output.push(result)
81
+ end
82
+ end
83
+
84
+ # Report results back to Harvester incrementally
85
+ def report_incremental(results, link)
86
+ curl_url = @cm_url+"/relay_results"
87
+ c = Curl::Easy.http_post(curl_url,
88
+ Curl::PostField.content('selector_id', @selector_id),
89
+ Curl::PostField.content('status_message', "Collected " + link),
90
+ Curl::PostField.content('results', JSON.pretty_generate(results)))
91
+ end
92
+
60
93
  # Generate JSON for output
61
94
  def gen_json
62
95
  JSON.pretty_generate(@output)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twittercrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath