jamnagar 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rspec +3 -0
  4. data/.vagrant/machines/default/virtualbox/action_provision +1 -0
  5. data/.vagrant/machines/default/virtualbox/action_set_name +1 -0
  6. data/.vagrant/machines/default/virtualbox/id +1 -0
  7. data/.vagrant/machines/default/virtualbox/index_uuid +1 -0
  8. data/.vagrant/machines/default/virtualbox/private_key +27 -0
  9. data/.vagrant/machines/default/virtualbox/synced_folders +1 -0
  10. data/Gemfile +4 -0
  11. data/Gemfile.lock +64 -0
  12. data/LICENSE.txt +22 -0
  13. data/README.md +31 -0
  14. data/Rakefile +2 -0
  15. data/UTF-8-test.txt +0 -0
  16. data/Vagrantfile +67 -0
  17. data/blinky_tests +3 -0
  18. data/bootstrap.sh +36 -0
  19. data/jamnagar.gemspec +33 -0
  20. data/lib/jamnagar/adapters/adapter.rb +18 -0
  21. data/lib/jamnagar/adapters/file_system_adapter.rb +9 -0
  22. data/lib/jamnagar/adapters/mongo_adapter.rb +54 -0
  23. data/lib/jamnagar/adapters/persistent_store_adapter.rb +9 -0
  24. data/lib/jamnagar/initializers/mongo.rb +6 -0
  25. data/lib/jamnagar/materials/item.rb +15 -0
  26. data/lib/jamnagar/materials/ore.rb +29 -0
  27. data/lib/jamnagar/producers/producer.rb +5 -0
  28. data/lib/jamnagar/producers/rss_producer.rb +6 -0
  29. data/lib/jamnagar/producers/twitter_producer.rb +6 -0
  30. data/lib/jamnagar/refineries/content_refinery.rb +46 -0
  31. data/lib/jamnagar/refiners/contributor_detail.rb +17 -0
  32. data/lib/jamnagar/refiners/duplicate_detection.rb +23 -0
  33. data/lib/jamnagar/refiners/meta_data_extraction.rb +15 -0
  34. data/lib/jamnagar/refiners/popularity_incrementation.rb +22 -0
  35. data/lib/jamnagar/refiners/primary_key_generation.rb +20 -0
  36. data/lib/jamnagar/refiners/refiner.rb +22 -0
  37. data/lib/jamnagar/refiners/source_detail.rb +17 -0
  38. data/lib/jamnagar/refiners/url_expansion.rb +23 -0
  39. data/lib/jamnagar/refiners/utm_stripping.rb +13 -0
  40. data/lib/jamnagar/storage/basic_store.rb +34 -0
  41. data/lib/jamnagar/storage/contributor_store.rb +35 -0
  42. data/lib/jamnagar/storage/in_memory_cache.rb +17 -0
  43. data/lib/jamnagar/storage/item_store.rb +21 -0
  44. data/lib/jamnagar/storage/refined_item_store.rb +17 -0
  45. data/lib/jamnagar/storage/source_store.rb +35 -0
  46. data/lib/jamnagar/utilities/duplicate_detector.rb +12 -0
  47. data/lib/jamnagar/utilities/meta_data_extractor.rb +16 -0
  48. data/lib/jamnagar/utilities/popularity_incrementor.rb +20 -0
  49. data/lib/jamnagar/utilities/runner.rb +12 -0
  50. data/lib/jamnagar/utilities/silent_logger.rb +17 -0
  51. data/lib/jamnagar/utilities/url_expander.rb +47 -0
  52. data/lib/jamnagar/utilities/utm_stripper.rb +21 -0
  53. data/lib/jamnagar/verifiers/uniqueness_verifier.rb +16 -0
  54. data/lib/jamnagar/verifiers/verifier.rb +13 -0
  55. data/lib/jamnagar/version.rb +3 -0
  56. data/lib/jamnagar.rb +7 -0
  57. data/run.rb +49 -0
  58. data/sentinal +10 -0
  59. data/spec/basic_store_spec.rb +53 -0
  60. data/spec/content_refinement_spec.rb +74 -0
  61. data/spec/contributor_detail_refinment_spec.rb +26 -0
  62. data/spec/contributor_store_spec.rb +31 -0
  63. data/spec/duplicate_detector_spec.rb +26 -0
  64. data/spec/helpers.rb +92 -0
  65. data/spec/item_spec.rb +9 -0
  66. data/spec/item_store_spec.rb +18 -0
  67. data/spec/mongo_adapter_spec.rb +18 -0
  68. data/spec/popularity_incrementor_spec.rb +23 -0
  69. data/spec/producers_spec.rb +9 -0
  70. data/spec/refined_item_store_spec.rb +29 -0
  71. data/spec/refinements_spec.rb +118 -0
  72. data/spec/runner_spec.rb +8 -0
  73. data/spec/scenarios_spec.rb +4 -0
  74. data/spec/source_detail_refinment_spec.rb +24 -0
  75. data/spec/source_store_spec.rb +31 -0
  76. data/spec/spec_helper.rb +98 -0
  77. data/spec/url_expander_spec.rb +46 -0
  78. data/spec/utm_stripper_spec.rb +31 -0
  79. data/spec/utm_stripping_spec.rb +5 -0
  80. data/spec/verifications_spec.rb +22 -0
  81. data/tracer.rb +61 -0
  82. data/tweet_stream.json +1 -0
  83. metadata +288 -0
data/tracer.rb ADDED
@@ -0,0 +1,61 @@
1
+ require 'aws/kinesis'
2
+ require 'multi_json'
3
+ require 'twitter'
4
+
5
+ class SampleProducer
6
+ def initialize(data, service, stream_name, sleep_between_puts, shard_count=nil)
7
+ @data = data
8
+ @stream_name = stream_name
9
+ @shard_count = shard_count
10
+ @sleep_between_puts = sleep_between_puts
11
+ @kinesis = service
12
+ end
13
+
14
+ def run
15
+ @data.each do |record|
16
+ put_record(record)
17
+ end
18
+ end
19
+
20
+ def put_record(record)
21
+ data = record
22
+ data_blob = MultiJson.dump(data)
23
+ r = @kinesis.put_record(:stream_name => @stream_name,
24
+ :data => data_blob,
25
+ :partition_key => data[:id_str])
26
+ puts "Put record to shard '#{r[:shard_id]}' (#{r[:sequence_number]})"
27
+ end
28
+ end
29
+
30
+
31
+ AWS.config(
32
+ :access_key_id => 'AKIAIB2JQHAQAT4T4DVQ',
33
+ :secret_access_key => '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'
34
+ )
35
+
36
+ # Angler's Weekly
37
+ twitter_credentials = {:oauth_token=>"2602827816-rbQ93UFhoJHJZCoydSATlLZGGA8VdytcX5zNIM0", :oauth_token_secret=>"DNsZjcBFWZKOa1jEZRv08KwQymuhB69ObQHWNzn65FTXH", :consumer_secret=>"txblKWrEMSUTWjFeSNVdDC9XMkfwdEe49hjHSzLgEy46qqS9oX", :consumer_key=>"b3z9VvRtkUFbIvsXiMDzOUXjY"}
38
+
39
+ client = Twitter::REST::Client.new do |config|
40
+ config.consumer_key = twitter_credentials[:consumer_key]
41
+ config.consumer_secret = twitter_credentials[:consumer_secret]
42
+ config.access_token = twitter_credentials[:oauth_token]
43
+ config.access_token_secret = twitter_credentials[:oauth_token_secret]
44
+ end
45
+
46
+ puts "Getting Lists"
47
+ lists = client.lists('agileweekly')
48
+
49
+ puts "Getting Timelines for Lists"
50
+ tweets = lists.flat_map{|list| client.list_timeline(list, {:count => 199})}.uniq.compact
51
+
52
+ puts "Finding Tweets with URLs"
53
+ with_urls = tweets.select{|t| t.urls? }.compact
54
+
55
+ puts "Converting Tweets to Hashes"
56
+ data = with_urls.map{|tweet| tweet.to_h }
57
+
58
+ kinesis = AWS::Kinesis::Client.new({})
59
+ producer = SampleProducer.new(data, kinesis, "jamnagar_tweets", 0, 2)
60
+ puts "Sending to Kinesis"
61
+ producer.run