jamnagar 1.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +3 -0
- data/.vagrant/machines/default/virtualbox/action_provision +1 -0
- data/.vagrant/machines/default/virtualbox/action_set_name +1 -0
- data/.vagrant/machines/default/virtualbox/id +1 -0
- data/.vagrant/machines/default/virtualbox/index_uuid +1 -0
- data/.vagrant/machines/default/virtualbox/private_key +27 -0
- data/.vagrant/machines/default/virtualbox/synced_folders +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +64 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +2 -0
- data/UTF-8-test.txt +0 -0
- data/Vagrantfile +67 -0
- data/blinky_tests +3 -0
- data/bootstrap.sh +36 -0
- data/jamnagar.gemspec +33 -0
- data/lib/jamnagar/adapters/adapter.rb +18 -0
- data/lib/jamnagar/adapters/file_system_adapter.rb +9 -0
- data/lib/jamnagar/adapters/mongo_adapter.rb +54 -0
- data/lib/jamnagar/adapters/persistent_store_adapter.rb +9 -0
- data/lib/jamnagar/initializers/mongo.rb +6 -0
- data/lib/jamnagar/materials/item.rb +15 -0
- data/lib/jamnagar/materials/ore.rb +29 -0
- data/lib/jamnagar/producers/producer.rb +5 -0
- data/lib/jamnagar/producers/rss_producer.rb +6 -0
- data/lib/jamnagar/producers/twitter_producer.rb +6 -0
- data/lib/jamnagar/refineries/content_refinery.rb +46 -0
- data/lib/jamnagar/refiners/contributor_detail.rb +17 -0
- data/lib/jamnagar/refiners/duplicate_detection.rb +23 -0
- data/lib/jamnagar/refiners/meta_data_extraction.rb +15 -0
- data/lib/jamnagar/refiners/popularity_incrementation.rb +22 -0
- data/lib/jamnagar/refiners/primary_key_generation.rb +20 -0
- data/lib/jamnagar/refiners/refiner.rb +22 -0
- data/lib/jamnagar/refiners/source_detail.rb +17 -0
- data/lib/jamnagar/refiners/url_expansion.rb +23 -0
- data/lib/jamnagar/refiners/utm_stripping.rb +13 -0
- data/lib/jamnagar/storage/basic_store.rb +34 -0
- data/lib/jamnagar/storage/contributor_store.rb +35 -0
- data/lib/jamnagar/storage/in_memory_cache.rb +17 -0
- data/lib/jamnagar/storage/item_store.rb +21 -0
- data/lib/jamnagar/storage/refined_item_store.rb +17 -0
- data/lib/jamnagar/storage/source_store.rb +35 -0
- data/lib/jamnagar/utilities/duplicate_detector.rb +12 -0
- data/lib/jamnagar/utilities/meta_data_extractor.rb +16 -0
- data/lib/jamnagar/utilities/popularity_incrementor.rb +20 -0
- data/lib/jamnagar/utilities/runner.rb +12 -0
- data/lib/jamnagar/utilities/silent_logger.rb +17 -0
- data/lib/jamnagar/utilities/url_expander.rb +47 -0
- data/lib/jamnagar/utilities/utm_stripper.rb +21 -0
- data/lib/jamnagar/verifiers/uniqueness_verifier.rb +16 -0
- data/lib/jamnagar/verifiers/verifier.rb +13 -0
- data/lib/jamnagar/version.rb +3 -0
- data/lib/jamnagar.rb +7 -0
- data/run.rb +49 -0
- data/sentinal +10 -0
- data/spec/basic_store_spec.rb +53 -0
- data/spec/content_refinement_spec.rb +74 -0
- data/spec/contributor_detail_refinment_spec.rb +26 -0
- data/spec/contributor_store_spec.rb +31 -0
- data/spec/duplicate_detector_spec.rb +26 -0
- data/spec/helpers.rb +92 -0
- data/spec/item_spec.rb +9 -0
- data/spec/item_store_spec.rb +18 -0
- data/spec/mongo_adapter_spec.rb +18 -0
- data/spec/popularity_incrementor_spec.rb +23 -0
- data/spec/producers_spec.rb +9 -0
- data/spec/refined_item_store_spec.rb +29 -0
- data/spec/refinements_spec.rb +118 -0
- data/spec/runner_spec.rb +8 -0
- data/spec/scenarios_spec.rb +4 -0
- data/spec/source_detail_refinment_spec.rb +24 -0
- data/spec/source_store_spec.rb +31 -0
- data/spec/spec_helper.rb +98 -0
- data/spec/url_expander_spec.rb +46 -0
- data/spec/utm_stripper_spec.rb +31 -0
- data/spec/utm_stripping_spec.rb +5 -0
- data/spec/verifications_spec.rb +22 -0
- data/tracer.rb +61 -0
- data/tweet_stream.json +1 -0
- metadata +288 -0
data/tracer.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'aws/kinesis'
|
2
|
+
require 'multi_json'
|
3
|
+
require 'twitter'
|
4
|
+
|
5
|
+
class SampleProducer
|
6
|
+
def initialize(data, service, stream_name, sleep_between_puts, shard_count=nil)
|
7
|
+
@data = data
|
8
|
+
@stream_name = stream_name
|
9
|
+
@shard_count = shard_count
|
10
|
+
@sleep_between_puts = sleep_between_puts
|
11
|
+
@kinesis = service
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
@data.each do |record|
|
16
|
+
put_record(record)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def put_record(record)
|
21
|
+
data = record
|
22
|
+
data_blob = MultiJson.dump(data)
|
23
|
+
r = @kinesis.put_record(:stream_name => @stream_name,
|
24
|
+
:data => data_blob,
|
25
|
+
:partition_key => data[:id_str])
|
26
|
+
puts "Put record to shard '#{r[:shard_id]}' (#{r[:sequence_number]})"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
AWS.config(
|
32
|
+
:access_key_id => 'AKIAIB2JQHAQAT4T4DVQ',
|
33
|
+
:secret_access_key => 'WcLL+tAzncG32L+YbB9Z5RQutT35Nk4PfLv7BMj3'
|
34
|
+
)
|
35
|
+
|
36
|
+
# Angler's Weekly
|
37
|
+
twitter_credentials = {:oauth_token=>"2602827816-rbQ93UFhoJHJZCoydSATlLZGGA8VdytcX5zNIM0", :oauth_token_secret=>"DNsZjcBFWZKOa1jEZRv08KwQymuhB69ObQHWNzn65FTXH", :consumer_secret=>"txblKWrEMSUTWjFeSNVdDC9XMkfwdEe49hjHSzLgEy46qqS9oX", :consumer_key=>"b3z9VvRtkUFbIvsXiMDzOUXjY"}
|
38
|
+
|
39
|
+
client = Twitter::REST::Client.new do |config|
|
40
|
+
config.consumer_key = twitter_credentials[:consumer_key]
|
41
|
+
config.consumer_secret = twitter_credentials[:consumer_secret]
|
42
|
+
config.access_token = twitter_credentials[:oauth_token]
|
43
|
+
config.access_token_secret = twitter_credentials[:oauth_token_secret]
|
44
|
+
end
|
45
|
+
|
46
|
+
puts "Getting Lists"
|
47
|
+
lists = client.lists('agileweekly')
|
48
|
+
|
49
|
+
puts "Getting Timelines for Lists"
|
50
|
+
tweets = lists.flat_map{|list| client.list_timeline(list, {:count => 199})}.uniq.compact
|
51
|
+
|
52
|
+
puts "Finding Tweets with URLs"
|
53
|
+
with_urls = tweets.select{|t| t.urls? }.compact
|
54
|
+
|
55
|
+
puts "Converting Tweets to Hashes"
|
56
|
+
data = with_urls.map{|tweet| tweet.to_h }
|
57
|
+
|
58
|
+
kinesis = AWS::Kinesis::Client.new({})
|
59
|
+
producer = SampleProducer.new(data, kinesis, "jamnagar_tweets", 0, 2)
|
60
|
+
puts "Sending to Kinesis"
|
61
|
+
producer.run
|