twittercrawler 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/twittercrawler.rb +35 -2
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b578c69314b844603dcc13e8968ce0ad5f11024b
4
- data.tar.gz: 0f7df31fc66bad9e79d890c50b43ffcaccee4f63
3
+ metadata.gz: 25d1d3c957757eeca51247c704e42f9c009a8d09
4
+ data.tar.gz: 55bcea247d9d2c926b6fdafe51c876de6ed30049
5
5
  SHA512:
6
- metadata.gz: 617241a871ac10423a4096a71b282217a8e53ce182b09df1cefe1ea3e625611a9f55b139cdd6429607d32583c49edf6e92c33c897e3c83f050c3bfbead1be7d5
7
- data.tar.gz: 3f1039aa7975a23b8f284ea3f255c15ce2344d9196ca48d48f0a8d78a737d84eac1cc590135bf553e7360b34b20916cec2901d3ee325ba98263c9ab4be24be04
6
+ metadata.gz: a2bf6e5315fae1c511f63d94dd928c99998d7ae791983837fc9564e5e070ee4ca8afe73c2a5d959855e2e38706688db5e511abf208873bb1981bb3d2341835eb
7
+ data.tar.gz: 8495e3060c561ee3eb4885a6e8686a64256cd8a85a17562110030eff6c2da66765487f67e72370dd31ae07c350cd4633bdd798756c9abdac4f7addc69aa5908d
@@ -6,11 +6,15 @@ require 'nokogiri'
6
6
  load 'twitter_parser.rb'
7
7
 
8
8
  class TwitterCrawler
9
- def initialize(search_term, operator, requests)
9
+ def initialize(search_term, operator, requests, cm_hash)
10
10
  @search_term = search_term
11
11
  @operator = operator
12
12
  @requests = requests
13
13
  @output = Array.new
14
+
15
+ # Handle crawler manager info
16
+ @cm_url = cm_hash[:crawler_manager_url] if cm_hash
17
+ @selector_id = cm_hash[:selector_id] if cm_hash
14
18
  end
15
19
 
16
20
  # Generate advanced query
@@ -36,9 +40,13 @@ class TwitterCrawler
36
40
 
37
41
  # Parse each tweet
38
42
  tweets.each do |tweet|
43
+ # Add tweet
39
44
  tweet_html = tweet.attribute("innerHTML")
40
45
  parser = TwitterParser.new(tweet_html)
41
- @output.push(parser.parse_tweet)
46
+ parsed_tweet = parser.parse_tweet
47
+
48
+ # Report results
49
+ report_results([pared_tweet], parsed_tweet[:tweet_link])
42
50
  end
43
51
  end
44
52
 
@@ -57,6 +65,31 @@ class TwitterCrawler
57
65
  end
58
66
  end
59
67
 
68
+ # Figure out how to report results
69
+ def report_results(results, link)
70
+ if @cm_url
71
+ report_incremental(results, link)
72
+ else
73
+ report_batch(results)
74
+ end
75
+ end
76
+
77
+ # Report all results in one JSON
78
+ def report_batch(results)
79
+ results.each do |result|
80
+ @output.push(result)
81
+ end
82
+ end
83
+
84
+ # Report results back to Harvester incrementally
85
+ def report_incremental(results, link)
86
+ curl_url = @cm_url+"/relay_results"
87
+ c = Curl::Easy.http_post(curl_url,
88
+ Curl::PostField.content('selector_id', @selector_id),
89
+ Curl::PostField.content('status_message', "Collected " + link),
90
+ Curl::PostField.content('results', JSON.pretty_generate(results)))
91
+ end
92
+
60
93
  # Generate JSON for output
61
94
  def gen_json
62
95
  JSON.pretty_generate(@output)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twittercrawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath