jamnagar 1.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rspec +3 -0
  4. data/.vagrant/machines/default/virtualbox/action_provision +1 -0
  5. data/.vagrant/machines/default/virtualbox/action_set_name +1 -0
  6. data/.vagrant/machines/default/virtualbox/id +1 -0
  7. data/.vagrant/machines/default/virtualbox/index_uuid +1 -0
  8. data/.vagrant/machines/default/virtualbox/private_key +27 -0
  9. data/.vagrant/machines/default/virtualbox/synced_folders +1 -0
  10. data/Gemfile +4 -0
  11. data/Gemfile.lock +64 -0
  12. data/LICENSE.txt +22 -0
  13. data/README.md +31 -0
  14. data/Rakefile +2 -0
  15. data/UTF-8-test.txt +0 -0
  16. data/Vagrantfile +67 -0
  17. data/blinky_tests +3 -0
  18. data/bootstrap.sh +36 -0
  19. data/jamnagar.gemspec +33 -0
  20. data/lib/jamnagar/adapters/adapter.rb +18 -0
  21. data/lib/jamnagar/adapters/file_system_adapter.rb +9 -0
  22. data/lib/jamnagar/adapters/mongo_adapter.rb +54 -0
  23. data/lib/jamnagar/adapters/persistent_store_adapter.rb +9 -0
  24. data/lib/jamnagar/initializers/mongo.rb +6 -0
  25. data/lib/jamnagar/materials/item.rb +15 -0
  26. data/lib/jamnagar/materials/ore.rb +29 -0
  27. data/lib/jamnagar/producers/producer.rb +5 -0
  28. data/lib/jamnagar/producers/rss_producer.rb +6 -0
  29. data/lib/jamnagar/producers/twitter_producer.rb +6 -0
  30. data/lib/jamnagar/refineries/content_refinery.rb +46 -0
  31. data/lib/jamnagar/refiners/contributor_detail.rb +17 -0
  32. data/lib/jamnagar/refiners/duplicate_detection.rb +23 -0
  33. data/lib/jamnagar/refiners/meta_data_extraction.rb +15 -0
  34. data/lib/jamnagar/refiners/popularity_incrementation.rb +22 -0
  35. data/lib/jamnagar/refiners/primary_key_generation.rb +20 -0
  36. data/lib/jamnagar/refiners/refiner.rb +22 -0
  37. data/lib/jamnagar/refiners/source_detail.rb +17 -0
  38. data/lib/jamnagar/refiners/url_expansion.rb +23 -0
  39. data/lib/jamnagar/refiners/utm_stripping.rb +13 -0
  40. data/lib/jamnagar/storage/basic_store.rb +34 -0
  41. data/lib/jamnagar/storage/contributor_store.rb +35 -0
  42. data/lib/jamnagar/storage/in_memory_cache.rb +17 -0
  43. data/lib/jamnagar/storage/item_store.rb +21 -0
  44. data/lib/jamnagar/storage/refined_item_store.rb +17 -0
  45. data/lib/jamnagar/storage/source_store.rb +35 -0
  46. data/lib/jamnagar/utilities/duplicate_detector.rb +12 -0
  47. data/lib/jamnagar/utilities/meta_data_extractor.rb +16 -0
  48. data/lib/jamnagar/utilities/popularity_incrementor.rb +20 -0
  49. data/lib/jamnagar/utilities/runner.rb +12 -0
  50. data/lib/jamnagar/utilities/silent_logger.rb +17 -0
  51. data/lib/jamnagar/utilities/url_expander.rb +47 -0
  52. data/lib/jamnagar/utilities/utm_stripper.rb +21 -0
  53. data/lib/jamnagar/verifiers/uniqueness_verifier.rb +16 -0
  54. data/lib/jamnagar/verifiers/verifier.rb +13 -0
  55. data/lib/jamnagar/version.rb +3 -0
  56. data/lib/jamnagar.rb +7 -0
  57. data/run.rb +49 -0
  58. data/sentinal +10 -0
  59. data/spec/basic_store_spec.rb +53 -0
  60. data/spec/content_refinement_spec.rb +74 -0
  61. data/spec/contributor_detail_refinment_spec.rb +26 -0
  62. data/spec/contributor_store_spec.rb +31 -0
  63. data/spec/duplicate_detector_spec.rb +26 -0
  64. data/spec/helpers.rb +92 -0
  65. data/spec/item_spec.rb +9 -0
  66. data/spec/item_store_spec.rb +18 -0
  67. data/spec/mongo_adapter_spec.rb +18 -0
  68. data/spec/popularity_incrementor_spec.rb +23 -0
  69. data/spec/producers_spec.rb +9 -0
  70. data/spec/refined_item_store_spec.rb +29 -0
  71. data/spec/refinements_spec.rb +118 -0
  72. data/spec/runner_spec.rb +8 -0
  73. data/spec/scenarios_spec.rb +4 -0
  74. data/spec/source_detail_refinment_spec.rb +24 -0
  75. data/spec/source_store_spec.rb +31 -0
  76. data/spec/spec_helper.rb +98 -0
  77. data/spec/url_expander_spec.rb +46 -0
  78. data/spec/utm_stripper_spec.rb +31 -0
  79. data/spec/utm_stripping_spec.rb +5 -0
  80. data/spec/verifications_spec.rb +22 -0
  81. data/tracer.rb +61 -0
  82. data/tweet_stream.json +1 -0
  83. metadata +288 -0
@@ -0,0 +1,22 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class Refiner
4
+ def initialize(args={})
5
+
6
+ end
7
+
8
+ def to_s
9
+ "Refiner"
10
+ end
11
+
12
+ def refine(item)
13
+ item.merge_refinement(refinement_result(item))
14
+ item
15
+ end
16
+
17
+ def refinement_result(item)
18
+ {}
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class SourceDetail < Refiner
4
+ def initialize(store: nil)
5
+ @store = store
6
+ end
7
+
8
+ def to_s
9
+ "Source Detail"
10
+ end
11
+
12
+ def refinement_result(item)
13
+ {"source" => @store.find_source(item, item.raw_source)}
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,23 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class UrlExpansion < Refiner
4
+ def initialize(expander=nil)
5
+ @expander = expander || Jamnagar::Utilities::UrlExpander.new
6
+ end
7
+
8
+ def to_s
9
+ "URL Expansion"
10
+ end
11
+
12
+ def refine(item)
13
+ super item
14
+ end
15
+
16
+ def refinement_result(item)
17
+ expanded = @expander.expand(item["url"])
18
+
19
+ {"final_url" => expanded['final_url'], "final_url_body" => expanded['body'], "final_url_host" => expanded["final_url_host"]}
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,13 @@
1
+ module Jamnagar
2
+ module Refiners
3
+ class UtmStripping < Refiner
4
+ def initialize(stripper=nil)
5
+ @stripper = stripper || Jamnagar::Utilities::UtmStripper.new
6
+ end
7
+
8
+ def refinement_result(item)
9
+ {"final_url" => @stripper.strip(item["final_url"])}
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,34 @@
1
+ module Jamnagar
2
+ module Storage
3
+ class BasicStore
4
+ def initialize(adapter=nil)
5
+ @adapter = adapter || {}
6
+ end
7
+
8
+ def insert(record)
9
+ return unless record
10
+ @adapter.store(record["_id"], record.to_h)
11
+ end
12
+
13
+ def get(key)
14
+ @adapter[key]
15
+ end
16
+
17
+ def find_first(params)
18
+ @adapter.find_first(params)
19
+ end
20
+
21
+ def find_one(params)
22
+ @adapter.find_one(params)
23
+ end
24
+
25
+ def find(params)
26
+ @adapter.find(params)
27
+ end
28
+
29
+ def find_and_modify(params)
30
+ @adapter.find_and_modify(params)
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,35 @@
1
+ module Jamnagar
2
+ module Storage
3
+ class ContributorStore < BasicStore
4
+ def initialize(adapter=nil)
5
+ @adapter = adapter || {}
6
+ end
7
+
8
+ def find_contributor(item, contributor)
9
+ existing = find_and_update_contributor(item, contributor)
10
+ existing = create_contributor(item, contributor) unless existing
11
+ existing
12
+ end
13
+
14
+ private
15
+ def find_and_update_contributor(item, contributor)
16
+ existing = @adapter.find_first({"_id" => contributor["id"]})
17
+ update_contributions(existing, item) if existing
18
+ existing
19
+ end
20
+
21
+ def create_contributor(item, contributor)
22
+ contributions = [item["_id"]]
23
+ created = @adapter.store(contributor["id"], contributor.merge({"_id" => contributor["id"], "contributions" => contributions, "contributions_count" => 1}))
24
+ @adapter.find_first({"_id" => created})
25
+ end
26
+
27
+ def update_contributions(contributor, item)
28
+ return unless contributor && contributor["contributions"] && item && item["_id"]
29
+ existing_contributions = contributor["contributions"] || []
30
+ contributions = existing_contributions << item["_id"]
31
+ @adapter.update({"_id" => contributor["_id"]}, "$set" => {"contributions" => contributions}, "$inc" => {"contributions_count" => 1})
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,17 @@
1
+ module Jamnagar
2
+ module Storage
3
+ class InMemoryCache
4
+ def initialize
5
+ @cache = {}
6
+ end
7
+
8
+ def get(key)
9
+ @cache[key]
10
+ end
11
+
12
+ def set(key, value)
13
+ @cache[key] = value
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,21 @@
1
+ module Jamnagar
2
+ module Storage
3
+ class ItemStore < BasicStore
4
+
5
+ class MissingPrimaryKeyException < StandardError;end
6
+
7
+ def insert(item)
8
+ return unless item
9
+ raise MissingPrimaryKeyException unless has_primary_key?(item)
10
+ super item
11
+ end
12
+
13
+ private
14
+ def has_primary_key?(item)
15
+ return false unless item
16
+ return false unless item.keys.include?("_id")
17
+ true
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ module Jamnagar
2
+ module Storage
3
+ class RefinedItemStore
4
+ def initialize(adapter=nil)
5
+ @adapter = adapter || Storage::PersistentStoreAdapter.new
6
+ end
7
+
8
+ def duplicates_of(account_id, url)
9
+ @adapter.find(account_id, url)
10
+ end
11
+
12
+ def insert(item)
13
+ @adapter.insert(item)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,35 @@
1
+ module Jamnagar
2
+ module Storage
3
+ class SourceStore < BasicStore
4
+ def initialize(adapter=nil)
5
+ @adapter = adapter || {}
6
+ end
7
+
8
+ def find_source(item, source)
9
+ existing = find_and_update_source(item, source)
10
+ existing = create_source(item, source) unless existing
11
+ existing
12
+ end
13
+
14
+ private
15
+ def find_and_update_source(item, source)
16
+ existing = @adapter.find_first({"_id" => source["id"]})
17
+ update_contributions(existing, item) if existing
18
+ existing
19
+ end
20
+
21
+ def create_source(item, source)
22
+ contributions = [item["_id"]]
23
+ created = @adapter.store(source["id"], source.merge({"_id" => source["id"], "contributions" => contributions, "contributions_count" => 1}))
24
+ @adapter.find_first({"_id" => created})
25
+ end
26
+
27
+ def update_contributions(source, item)
28
+ return unless source && source["contributions"] && item && item["_id"]
29
+ existing_contributions = source["contributions"] || []
30
+ contributions = existing_contributions << item["_id"]
31
+ @adapter.update({"_id" => source["_id"]}, "$set" => {"contributions" => contributions}, "$inc" => {"contributions_count" => 1})
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,12 @@
1
+ module Jamnagar
2
+ module Utilities
3
+ class DuplicateDetector
4
+ def initialize(store)
5
+ @store = store
6
+ end
7
+ def detect(final_url)
8
+ @store.find_first({"final_url" => final_url})
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,16 @@
1
+ module Jamnagar
2
+ module Utilities
3
+ module Twitter
4
+ class MetaDataExtractor
5
+ def extract(item)
6
+ {
7
+ "twitter_id" => item["raw"]["id_str"],
8
+ "retweet_count" => item["raw"]["retweet_count"],
9
+ "favorite_count" => item["raw"]["favorite_count"],
10
+ "tweet_created_at" => item["raw"]["created_at"]
11
+ }
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,20 @@
1
+ module Jamnagar
2
+ module Utilities
3
+ class PopularityIncrementor
4
+ def initialize(store)
5
+ @store = store
6
+ end
7
+
8
+ def increment(id)
9
+ @store.find_and_modify(update_params(id))
10
+ end
11
+ private
12
+ def update_params(id)
13
+ {
14
+ :query => {"_id" => id},
15
+ :update => {"$inc" => {"popularity" => 1}}
16
+ }
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,12 @@
1
+ module Jamnagar
2
+ module Utilities
3
+ class Runner
4
+ def run(&block)
5
+ fork do
6
+ block.call
7
+ end
8
+ Process.wait
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,17 @@
1
+ module Jamnagar
2
+ module Utilities
3
+ class SilentLogger
4
+ def initialize(args=nil)
5
+
6
+ end
7
+
8
+ def debug(msg)
9
+
10
+ end
11
+
12
+ def logger(msg)
13
+
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,47 @@
1
+ require 'httparty'
2
+ require 'nokogiri'
3
+
4
+ module Jamnagar
5
+ module Utilities
6
+ class UrlExpander
7
+ def initialize(client=nil,cache=nil,parser=nil)
8
+ @client = client || HTTParty
9
+ @cache = cache || Jamnagar::Storage::InMemoryCache.new
10
+ @parser = parser || Nokogiri
11
+ end
12
+ def expand(url)
13
+ cache_hit = nil
14
+
15
+ cache_hit = expanded = check_cache(url) if @cache
16
+ expanded = lookup(url) unless expanded
17
+
18
+ cache_expanded(url, expanded) unless cache_hit
19
+
20
+ expanded
21
+ end
22
+ private
23
+ def check_cache(url)
24
+ result = @cache.get(url)
25
+ return url if result == ""
26
+ result
27
+ end
28
+
29
+ def cache_expanded(key, value)
30
+ @cache.set(key, value)
31
+ end
32
+
33
+ def lookup(url)
34
+ response = @client.get url, :limit => 10
35
+ {"final_url" => response.request.last_uri.to_s, "final_url_host" => response.request.last_uri.host.to_s, "body" => "null" }
36
+ rescue URI::InvalidURIError => error
37
+ matches = error.message.match(/^bad\sURI\(is\snot\sURI\?\)\:\s(.*)$/)
38
+ return url if matches.nil?
39
+ bad_uri = matches[1]
40
+ good_uri = URI.encode bad_uri
41
+ expand good_uri
42
+ rescue OpenSSL::SSL::SSLError, Net::ReadTimeout, SocketError, EOFError, HTTParty::RedirectionTooDeep, Errno::ETIMEDOUT, Zlib::DataError => error
43
+ {"final_url" => url, "body" => ""}
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,21 @@
1
+ module Jamnagar
2
+ module Utilities
3
+ class UtmStripper
4
+ def strip(url)
5
+ begin
6
+ uri = URI.parse url
7
+ return unless uri.query
8
+ parts = uri.query.split('&')
9
+ .reject { |part| part.start_with? 'utm' }
10
+ .reject { |part| part.start_with? 'mbid' }
11
+ .reject { |part| part.start_with? 'hootPostID' }
12
+ clean = "#{uri.scheme}://#{uri.host}#{uri.path}"
13
+ clean = clean + "?#{parts.join '&'}" unless parts.empty?
14
+ return clean
15
+ rescue URI::InvalidURIError
16
+ return url
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,16 @@
1
+ module Jamnagar
2
+ module Verifiers
3
+ module Twitter
4
+ class UniquenessVerifier < Jamnagar::Verifiers::Verifier
5
+ def initialize(store: nil)
6
+ @store = store
7
+ end
8
+ def accept_or_reject(item)
9
+ existing = @store.find_first({"twitter_id" => item['twitter_id']})
10
+ return nil if existing
11
+ item
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,13 @@
1
+ module Jamnagar
2
+ module Verifiers
3
+ class Verifier
4
+ def verify(item)
5
+ accept_or_reject(item)
6
+ end
7
+
8
+ def accept_or_reject(item)
9
+ item
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module Jamnagar
2
+ VERSION = "1.3.8"
3
+ end
data/lib/jamnagar.rb ADDED
@@ -0,0 +1,7 @@
1
+ require "jamnagar/version"
2
+ require 'require_all'
3
+ require_all File.join(File.dirname(__FILE__), 'jamnagar')
4
+
5
+ module Jamnagar
6
+ # Your code goes here...
7
+ end
data/run.rb ADDED
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env ruby
2
+ require 'json'
3
+ require 'logger'
4
+ require './lib/jamnagar'
5
+
6
+ raw_items = JSON.load(File.open("tweet_stream.json"))
7
+
8
+ with_urls = raw_items.select{|raw| raw["entities"]["urls"].size > 0 }.compact.uniq
9
+
10
+ account_id = [44,55,66].sample
11
+ items = with_urls.map{|item| {"raw" => item, "created_at" => Time.now.to_i, "account_id" => account_id, "url" => item["entities"]["urls"][0]["expanded_url"]}}
12
+ items = items[0...50]
13
+
14
+ item_collection = Jamnagar::Initializers::Mongo.new({:database => "jamnagar", :collection => "items"}).collection
15
+ item_storage = Jamnagar::Storage::ItemStore.new(Jamnagar::Adapters::MongoAdapter.new(item_collection))
16
+
17
+ source_collection = Jamnagar::Initializers::Mongo.new({:database => "jamnagar", :collection => "sources"}).collection
18
+ source_storage = Jamnagar::Storage::SourceStore.new(Jamnagar::Adapters::MongoAdapter.new(source_collection))
19
+
20
+
21
+ logger = Logger.new(STDOUT)
22
+ logger.level = Logger::DEBUG
23
+
24
+ refiners = [
25
+ Jamnagar::Refiners::PrimaryKeyGeneration.new,
26
+ Jamnagar::Refiners::UrlExpansion.new,
27
+ Jamnagar::Refiners::DuplicateDetection.new(store: item_storage),
28
+ Jamnagar::Refiners::SourceDetail.new(store: source_storage),
29
+ Jamnagar::Refiners::PopularityIncrementation.new(store: item_storage),
30
+ Jamnagar::Refiners::MetaDataExtraction.new
31
+ ]
32
+
33
+ verifiers = [
34
+ Jamnagar::Verifiers::Twitter::UniquenessVerifier.new(store: item_storage)
35
+ ]
36
+
37
+ refinery = Jamnagar::Refineries::ContentRefinery.new({
38
+ :items => items,
39
+ :refiners => refiners,
40
+ :storage => item_storage,
41
+ :verifiers => verifiers
42
+ })
43
+ s_time = Time.now
44
+ refinery.refine
45
+ e_time = Time.now
46
+
47
+ puts "\n------------------------\n"
48
+ puts "Refining took: #{(e_time - s_time).to_i} seconds"
49
+ puts "\n------------------------\n"
data/sentinal ADDED
@@ -0,0 +1,10 @@
1
+ #!/bin/bash
2
+
3
+ blinky-tape-test-tool xs
4
+ ./blinky_tests
5
+ /usr/local/bin/fswatch -o spec lib | xargs -n1 ./blinky_tests
6
+
7
+
8
+ # bundle exec rspec spec
9
+ # /usr/local/bin/fswatch -o spec lib | xargs -n1 bundle exec rspec spec
10
+
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ describe Jamnagar::Storage::BasicStore do
4
+ context 'adapters' do
5
+ it 'should allow the use of a separate adapter' do
6
+ adapter = Jamnagar::SpecHelpers::SimpleAdapter.new
7
+ sut = Jamnagar::Storage::BasicStore.new(adapter)
8
+ end
9
+ end
10
+ context "when an item is empty" do
11
+ it 'should not store the item' do
12
+ adapter = Jamnagar::SpecHelpers::SimpleAdapter.new
13
+ sut = Jamnagar::Storage::BasicStore.new(adapter)
14
+ expect(adapter).to_not receive(:insert)
15
+ sut.insert(nil)
16
+ end
17
+ end
18
+
19
+ context 'retriving records' do
20
+ it 'should get records using a primary key' do
21
+ sut = Jamnagar::Storage::BasicStore.new
22
+ sut.insert({"_id" => 1, "url" => "http://example.com"})
23
+ sut.insert({"_id" => 2, "url" => "http://example.org"})
24
+
25
+ expect(sut.get(2)).to eq({"_id" => 2, "url" => "http://example.org"})
26
+ end
27
+ context 'when looking for the first record' do
28
+ it 'should ask the adapter' do
29
+ adapter = double(Jamnagar::SpecHelpers::SimpleAdapter)
30
+ sut = Jamnagar::Storage::BasicStore.new(adapter)
31
+ expect(adapter).to receive(:find_first).with({"_id" => 123})
32
+ sut.find_first({"_id" => 123})
33
+ end
34
+ end
35
+ context 'when looking for multiple records' do
36
+ it 'should ask the adapter' do
37
+ adapter = double(Jamnagar::SpecHelpers::SimpleAdapter)
38
+ sut = Jamnagar::Storage::BasicStore.new(adapter)
39
+ expect(adapter).to receive(:find).with({"_id" => 123})
40
+ sut.find({"_id" => 123})
41
+ end
42
+ end
43
+ end
44
+ context 'find and modify' do
45
+ it 'should tell the adapter to find and modify the record' do
46
+ adapter = double(Jamnagar::SpecHelpers::SimpleAdapter)
47
+ sut = Jamnagar::Storage::BasicStore.new(adapter)
48
+ expect(adapter).to receive(:find_and_modify).with({"foo" => "bar"})
49
+ sut.find_and_modify({"foo" => "bar"})
50
+ end
51
+ end
52
+
53
+ end
@@ -0,0 +1,74 @@
1
+ require 'spec_helper'
2
+ require 'logger'
3
+
4
+ describe 'Content Refinement' do
5
+ before do
6
+ @runner = Jamnagar::SpecHelpers::PassThroughRunner.new
7
+ end
8
+ it 'should take an options hash' do
9
+ sut = Jamnagar::Refineries::ContentRefinery.new({})
10
+ end
11
+ it 'should accept a group of unrefined items for refinement' do
12
+ sut = Jamnagar::Refineries::ContentRefinery.new({:items => []})
13
+ end
14
+ it 'should accept a list of refiners' do
15
+ sut = Jamnagar::Refineries::ContentRefinery.new({:items => [], :refiners => []})
16
+ end
17
+ it 'should accept some form of storage' do
18
+ storage_tank = double(Jamnagar::Storage::ItemStore)
19
+ sut = Jamnagar::Refineries::ContentRefinery.new({:items => [], :refiners => [], :storage => storage_tank})
20
+ end
21
+ describe 'Converting Raw Items to First Class Items' do
22
+ it 'should convert incoming item hashes to first class items' do
23
+ raw = {"url" => "http://example.com", "id" => 1}
24
+ store = Jamnagar::SpecHelpers::SimpleItemStore.new
25
+ sut = Jamnagar::Refineries::ContentRefinery.new({:items => [raw], :refiners => [], :storage => store, :runner => @runner})
26
+ expect(Jamnagar::Materials::Item).to receive(:new).with(raw)
27
+ sut.refine
28
+ end
29
+ end
30
+ describe 'Storing refined items' do
31
+ it 'should store the items after they are refined' do
32
+ item = Jamnagar::Materials::Item.new({"id" => 1})
33
+ storage_tank = double(Jamnagar::Storage::ItemStore)
34
+ sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [], :storage => storage_tank, :runner => @runner})
35
+ expect(storage_tank).to receive(:insert).with(item)
36
+ sut.refine
37
+ end
38
+ context 'when something blows up' do
39
+ it 'should log the result' do
40
+ item = Jamnagar::Materials::Item.new({"_id" => 1})
41
+ storage_tank = double(Jamnagar::Storage::ItemStore)
42
+ logger = double(Logger, :debug => true)
43
+ expect(logger).to receive(:error).with("Insert Error: _id => 1")
44
+ sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [], :storage => storage_tank, :logger => logger, :runner => @runner})
45
+ allow(storage_tank).to receive(:insert).and_raise(Jamnagar::Adapters::InsertError)
46
+ sut.refine
47
+ end
48
+ end
49
+ end
50
+ describe 'Refining with refiners' do
51
+ it 'should refine each item with each refiner present' do
52
+ item = Jamnagar::Materials::Item.new
53
+ refiner = double(Jamnagar::Refiners::Refiner)
54
+ store = Jamnagar::SpecHelpers::SimpleItemStore.new
55
+ sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [refiner], :storage => store, :runner => @runner})
56
+
57
+ expect(refiner).to receive(:refine).with(item)
58
+
59
+ sut.refine
60
+ end
61
+ end
62
+ describe 'Checking Quality with Verifiers' do
63
+ it 'should refine each item with each refiner present' do
64
+ item = Jamnagar::Materials::Item.new
65
+ verifier = double(Jamnagar::Verifiers::Verifier)
66
+ store = Jamnagar::SpecHelpers::SimpleItemStore.new
67
+ sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :verifiers => [verifier], :storage => store, :runner => @runner})
68
+
69
+ expect(verifier).to receive(:verify).with(item)
70
+
71
+ sut.refine
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ describe Jamnagar::Refiners::ContributorDetail do
4
+ it 'accepts a store' do
5
+ store = {}
6
+ Jamnagar::Refiners::ContributorDetail.new(store: store)
7
+ end
8
+ it 'should look for the user in the contributor store' do
9
+ @raw = {"user" => {"id" => 43550495}}
10
+ @item = Jamnagar::Materials::Item.new({"raw" => @raw})
11
+ @store = double(Jamnagar::Storage::ContributorStore)
12
+ @sut = Jamnagar::Refiners::ContributorDetail.new(store: @store)
13
+
14
+ expect(@store).to receive(:find_contributor).with(@item, @raw["user"])
15
+ @sut.refine(@item)
16
+ end
17
+ it 'should return the details of the contributor' do
18
+ @raw = {"user" => {"id" => 43550495}}
19
+ @item = Jamnagar::Materials::Item.new({"raw" => @raw})
20
+ @store = double(Jamnagar::Storage::ContributorStore)
21
+ @sut = Jamnagar::Refiners::ContributorDetail.new(store: @store)
22
+ allow(@store).to receive(:find_contributor).and_return({"_id" => 999})
23
+
24
+ expect(@sut.refine(@item).to_h).to eq({"raw" => {"user" => {"id" => 43550495}}, "contributor" => {"_id" => 999}})
25
+ end
26
+ end