jamnagar 1.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +3 -0
- data/.vagrant/machines/default/virtualbox/action_provision +1 -0
- data/.vagrant/machines/default/virtualbox/action_set_name +1 -0
- data/.vagrant/machines/default/virtualbox/id +1 -0
- data/.vagrant/machines/default/virtualbox/index_uuid +1 -0
- data/.vagrant/machines/default/virtualbox/private_key +27 -0
- data/.vagrant/machines/default/virtualbox/synced_folders +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +64 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +2 -0
- data/UTF-8-test.txt +0 -0
- data/Vagrantfile +67 -0
- data/blinky_tests +3 -0
- data/bootstrap.sh +36 -0
- data/jamnagar.gemspec +33 -0
- data/lib/jamnagar/adapters/adapter.rb +18 -0
- data/lib/jamnagar/adapters/file_system_adapter.rb +9 -0
- data/lib/jamnagar/adapters/mongo_adapter.rb +54 -0
- data/lib/jamnagar/adapters/persistent_store_adapter.rb +9 -0
- data/lib/jamnagar/initializers/mongo.rb +6 -0
- data/lib/jamnagar/materials/item.rb +15 -0
- data/lib/jamnagar/materials/ore.rb +29 -0
- data/lib/jamnagar/producers/producer.rb +5 -0
- data/lib/jamnagar/producers/rss_producer.rb +6 -0
- data/lib/jamnagar/producers/twitter_producer.rb +6 -0
- data/lib/jamnagar/refineries/content_refinery.rb +46 -0
- data/lib/jamnagar/refiners/contributor_detail.rb +17 -0
- data/lib/jamnagar/refiners/duplicate_detection.rb +23 -0
- data/lib/jamnagar/refiners/meta_data_extraction.rb +15 -0
- data/lib/jamnagar/refiners/popularity_incrementation.rb +22 -0
- data/lib/jamnagar/refiners/primary_key_generation.rb +20 -0
- data/lib/jamnagar/refiners/refiner.rb +22 -0
- data/lib/jamnagar/refiners/source_detail.rb +17 -0
- data/lib/jamnagar/refiners/url_expansion.rb +23 -0
- data/lib/jamnagar/refiners/utm_stripping.rb +13 -0
- data/lib/jamnagar/storage/basic_store.rb +34 -0
- data/lib/jamnagar/storage/contributor_store.rb +35 -0
- data/lib/jamnagar/storage/in_memory_cache.rb +17 -0
- data/lib/jamnagar/storage/item_store.rb +21 -0
- data/lib/jamnagar/storage/refined_item_store.rb +17 -0
- data/lib/jamnagar/storage/source_store.rb +35 -0
- data/lib/jamnagar/utilities/duplicate_detector.rb +12 -0
- data/lib/jamnagar/utilities/meta_data_extractor.rb +16 -0
- data/lib/jamnagar/utilities/popularity_incrementor.rb +20 -0
- data/lib/jamnagar/utilities/runner.rb +12 -0
- data/lib/jamnagar/utilities/silent_logger.rb +17 -0
- data/lib/jamnagar/utilities/url_expander.rb +47 -0
- data/lib/jamnagar/utilities/utm_stripper.rb +21 -0
- data/lib/jamnagar/verifiers/uniqueness_verifier.rb +16 -0
- data/lib/jamnagar/verifiers/verifier.rb +13 -0
- data/lib/jamnagar/version.rb +3 -0
- data/lib/jamnagar.rb +7 -0
- data/run.rb +49 -0
- data/sentinal +10 -0
- data/spec/basic_store_spec.rb +53 -0
- data/spec/content_refinement_spec.rb +74 -0
- data/spec/contributor_detail_refinment_spec.rb +26 -0
- data/spec/contributor_store_spec.rb +31 -0
- data/spec/duplicate_detector_spec.rb +26 -0
- data/spec/helpers.rb +92 -0
- data/spec/item_spec.rb +9 -0
- data/spec/item_store_spec.rb +18 -0
- data/spec/mongo_adapter_spec.rb +18 -0
- data/spec/popularity_incrementor_spec.rb +23 -0
- data/spec/producers_spec.rb +9 -0
- data/spec/refined_item_store_spec.rb +29 -0
- data/spec/refinements_spec.rb +118 -0
- data/spec/runner_spec.rb +8 -0
- data/spec/scenarios_spec.rb +4 -0
- data/spec/source_detail_refinment_spec.rb +24 -0
- data/spec/source_store_spec.rb +31 -0
- data/spec/spec_helper.rb +98 -0
- data/spec/url_expander_spec.rb +46 -0
- data/spec/utm_stripper_spec.rb +31 -0
- data/spec/utm_stripping_spec.rb +5 -0
- data/spec/verifications_spec.rb +22 -0
- data/tracer.rb +61 -0
- data/tweet_stream.json +1 -0
- metadata +288 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class Refiner
|
4
|
+
def initialize(args={})
|
5
|
+
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
"Refiner"
|
10
|
+
end
|
11
|
+
|
12
|
+
def refine(item)
|
13
|
+
item.merge_refinement(refinement_result(item))
|
14
|
+
item
|
15
|
+
end
|
16
|
+
|
17
|
+
def refinement_result(item)
|
18
|
+
{}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class SourceDetail < Refiner
|
4
|
+
def initialize(store: nil)
|
5
|
+
@store = store
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
"Source Detail"
|
10
|
+
end
|
11
|
+
|
12
|
+
def refinement_result(item)
|
13
|
+
{"source" => @store.find_source(item, item.raw_source)}
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class UrlExpansion < Refiner
|
4
|
+
def initialize(expander=nil)
|
5
|
+
@expander = expander || Jamnagar::Utilities::UrlExpander.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_s
|
9
|
+
"URL Expansion"
|
10
|
+
end
|
11
|
+
|
12
|
+
def refine(item)
|
13
|
+
super item
|
14
|
+
end
|
15
|
+
|
16
|
+
def refinement_result(item)
|
17
|
+
expanded = @expander.expand(item["url"])
|
18
|
+
|
19
|
+
{"final_url" => expanded['final_url'], "final_url_body" => expanded['body'], "final_url_host" => expanded["final_url_host"]}
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Refiners
|
3
|
+
class UtmStripping < Refiner
|
4
|
+
def initialize(stripper=nil)
|
5
|
+
@stripper = stripper || Jamnagar::Utilities::UtmStripper.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def refinement_result(item)
|
9
|
+
{"final_url" => @stripper.strip(item["final_url"])}
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Storage
|
3
|
+
class BasicStore
|
4
|
+
def initialize(adapter=nil)
|
5
|
+
@adapter = adapter || {}
|
6
|
+
end
|
7
|
+
|
8
|
+
def insert(record)
|
9
|
+
return unless record
|
10
|
+
@adapter.store(record["_id"], record.to_h)
|
11
|
+
end
|
12
|
+
|
13
|
+
def get(key)
|
14
|
+
@adapter[key]
|
15
|
+
end
|
16
|
+
|
17
|
+
def find_first(params)
|
18
|
+
@adapter.find_first(params)
|
19
|
+
end
|
20
|
+
|
21
|
+
def find_one(params)
|
22
|
+
@adapter.find_one(params)
|
23
|
+
end
|
24
|
+
|
25
|
+
def find(params)
|
26
|
+
@adapter.find(params)
|
27
|
+
end
|
28
|
+
|
29
|
+
def find_and_modify(params)
|
30
|
+
@adapter.find_and_modify(params)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Storage
|
3
|
+
class ContributorStore < BasicStore
|
4
|
+
def initialize(adapter=nil)
|
5
|
+
@adapter = adapter || {}
|
6
|
+
end
|
7
|
+
|
8
|
+
def find_contributor(item, contributor)
|
9
|
+
existing = find_and_update_contributor(item, contributor)
|
10
|
+
existing = create_contributor(item, contributor) unless existing
|
11
|
+
existing
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
def find_and_update_contributor(item, contributor)
|
16
|
+
existing = @adapter.find_first({"_id" => contributor["id"]})
|
17
|
+
update_contributions(existing, item) if existing
|
18
|
+
existing
|
19
|
+
end
|
20
|
+
|
21
|
+
def create_contributor(item, contributor)
|
22
|
+
contributions = [item["_id"]]
|
23
|
+
created = @adapter.store(contributor["id"], contributor.merge({"_id" => contributor["id"], "contributions" => contributions, "contributions_count" => 1}))
|
24
|
+
@adapter.find_first({"_id" => created})
|
25
|
+
end
|
26
|
+
|
27
|
+
def update_contributions(contributor, item)
|
28
|
+
return unless contributor && contributor["contributions"] && item && item["_id"]
|
29
|
+
existing_contributions = contributor["contributions"] || []
|
30
|
+
contributions = existing_contributions << item["_id"]
|
31
|
+
@adapter.update({"_id" => contributor["_id"]}, "$set" => {"contributions" => contributions}, "$inc" => {"contributions_count" => 1})
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Storage
|
3
|
+
class ItemStore < BasicStore
|
4
|
+
|
5
|
+
class MissingPrimaryKeyException < StandardError;end
|
6
|
+
|
7
|
+
def insert(item)
|
8
|
+
return unless item
|
9
|
+
raise MissingPrimaryKeyException unless has_primary_key?(item)
|
10
|
+
super item
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
def has_primary_key?(item)
|
15
|
+
return false unless item
|
16
|
+
return false unless item.keys.include?("_id")
|
17
|
+
true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Storage
|
3
|
+
class RefinedItemStore
|
4
|
+
def initialize(adapter=nil)
|
5
|
+
@adapter = adapter || Storage::PersistentStoreAdapter.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def duplicates_of(account_id, url)
|
9
|
+
@adapter.find(account_id, url)
|
10
|
+
end
|
11
|
+
|
12
|
+
def insert(item)
|
13
|
+
@adapter.insert(item)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Storage
|
3
|
+
class SourceStore < BasicStore
|
4
|
+
def initialize(adapter=nil)
|
5
|
+
@adapter = adapter || {}
|
6
|
+
end
|
7
|
+
|
8
|
+
def find_source(item, source)
|
9
|
+
existing = find_and_update_source(item, source)
|
10
|
+
existing = create_source(item, source) unless existing
|
11
|
+
existing
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
def find_and_update_source(item, source)
|
16
|
+
existing = @adapter.find_first({"_id" => source["id"]})
|
17
|
+
update_contributions(existing, item) if existing
|
18
|
+
existing
|
19
|
+
end
|
20
|
+
|
21
|
+
def create_source(item, source)
|
22
|
+
contributions = [item["_id"]]
|
23
|
+
created = @adapter.store(source["id"], source.merge({"_id" => source["id"], "contributions" => contributions, "contributions_count" => 1}))
|
24
|
+
@adapter.find_first({"_id" => created})
|
25
|
+
end
|
26
|
+
|
27
|
+
def update_contributions(source, item)
|
28
|
+
return unless source && source["contributions"] && item && item["_id"]
|
29
|
+
existing_contributions = source["contributions"] || []
|
30
|
+
contributions = existing_contributions << item["_id"]
|
31
|
+
@adapter.update({"_id" => source["_id"]}, "$set" => {"contributions" => contributions}, "$inc" => {"contributions_count" => 1})
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Utilities
|
3
|
+
module Twitter
|
4
|
+
class MetaDataExtractor
|
5
|
+
def extract(item)
|
6
|
+
{
|
7
|
+
"twitter_id" => item["raw"]["id_str"],
|
8
|
+
"retweet_count" => item["raw"]["retweet_count"],
|
9
|
+
"favorite_count" => item["raw"]["favorite_count"],
|
10
|
+
"tweet_created_at" => item["raw"]["created_at"]
|
11
|
+
}
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Utilities
|
3
|
+
class PopularityIncrementor
|
4
|
+
def initialize(store)
|
5
|
+
@store = store
|
6
|
+
end
|
7
|
+
|
8
|
+
def increment(id)
|
9
|
+
@store.find_and_modify(update_params(id))
|
10
|
+
end
|
11
|
+
private
|
12
|
+
def update_params(id)
|
13
|
+
{
|
14
|
+
:query => {"_id" => id},
|
15
|
+
:update => {"$inc" => {"popularity" => 1}}
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'httparty'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module Jamnagar
|
5
|
+
module Utilities
|
6
|
+
class UrlExpander
|
7
|
+
def initialize(client=nil,cache=nil,parser=nil)
|
8
|
+
@client = client || HTTParty
|
9
|
+
@cache = cache || Jamnagar::Storage::InMemoryCache.new
|
10
|
+
@parser = parser || Nokogiri
|
11
|
+
end
|
12
|
+
def expand(url)
|
13
|
+
cache_hit = nil
|
14
|
+
|
15
|
+
cache_hit = expanded = check_cache(url) if @cache
|
16
|
+
expanded = lookup(url) unless expanded
|
17
|
+
|
18
|
+
cache_expanded(url, expanded) unless cache_hit
|
19
|
+
|
20
|
+
expanded
|
21
|
+
end
|
22
|
+
private
|
23
|
+
def check_cache(url)
|
24
|
+
result = @cache.get(url)
|
25
|
+
return url if result == ""
|
26
|
+
result
|
27
|
+
end
|
28
|
+
|
29
|
+
def cache_expanded(key, value)
|
30
|
+
@cache.set(key, value)
|
31
|
+
end
|
32
|
+
|
33
|
+
def lookup(url)
|
34
|
+
response = @client.get url, :limit => 10
|
35
|
+
{"final_url" => response.request.last_uri.to_s, "final_url_host" => response.request.last_uri.host.to_s, "body" => "null" }
|
36
|
+
rescue URI::InvalidURIError => error
|
37
|
+
matches = error.message.match(/^bad\sURI\(is\snot\sURI\?\)\:\s(.*)$/)
|
38
|
+
return url if matches.nil?
|
39
|
+
bad_uri = matches[1]
|
40
|
+
good_uri = URI.encode bad_uri
|
41
|
+
expand good_uri
|
42
|
+
rescue OpenSSL::SSL::SSLError, Net::ReadTimeout, SocketError, EOFError, HTTParty::RedirectionTooDeep, Errno::ETIMEDOUT, Zlib::DataError => error
|
43
|
+
{"final_url" => url, "body" => ""}
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Utilities
|
3
|
+
class UtmStripper
|
4
|
+
def strip(url)
|
5
|
+
begin
|
6
|
+
uri = URI.parse url
|
7
|
+
return unless uri.query
|
8
|
+
parts = uri.query.split('&')
|
9
|
+
.reject { |part| part.start_with? 'utm' }
|
10
|
+
.reject { |part| part.start_with? 'mbid' }
|
11
|
+
.reject { |part| part.start_with? 'hootPostID' }
|
12
|
+
clean = "#{uri.scheme}://#{uri.host}#{uri.path}"
|
13
|
+
clean = clean + "?#{parts.join '&'}" unless parts.empty?
|
14
|
+
return clean
|
15
|
+
rescue URI::InvalidURIError
|
16
|
+
return url
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Jamnagar
|
2
|
+
module Verifiers
|
3
|
+
module Twitter
|
4
|
+
class UniquenessVerifier < Jamnagar::Verifiers::Verifier
|
5
|
+
def initialize(store: nil)
|
6
|
+
@store = store
|
7
|
+
end
|
8
|
+
def accept_or_reject(item)
|
9
|
+
existing = @store.find_first({"twitter_id" => item['twitter_id']})
|
10
|
+
return nil if existing
|
11
|
+
item
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/jamnagar.rb
ADDED
data/run.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'json'
|
3
|
+
require 'logger'
|
4
|
+
require './lib/jamnagar'
|
5
|
+
|
6
|
+
raw_items = JSON.load(File.open("tweet_stream.json"))
|
7
|
+
|
8
|
+
with_urls = raw_items.select{|raw| raw["entities"]["urls"].size > 0 }.compact.uniq
|
9
|
+
|
10
|
+
account_id = [44,55,66].sample
|
11
|
+
items = with_urls.map{|item| {"raw" => item, "created_at" => Time.now.to_i, "account_id" => account_id, "url" => item["entities"]["urls"][0]["expanded_url"]}}
|
12
|
+
items = items[0...50]
|
13
|
+
|
14
|
+
item_collection = Jamnagar::Initializers::Mongo.new({:database => "jamnagar", :collection => "items"}).collection
|
15
|
+
item_storage = Jamnagar::Storage::ItemStore.new(Jamnagar::Adapters::MongoAdapter.new(item_collection))
|
16
|
+
|
17
|
+
source_collection = Jamnagar::Initializers::Mongo.new({:database => "jamnagar", :collection => "sources"}).collection
|
18
|
+
source_storage = Jamnagar::Storage::SourceStore.new(Jamnagar::Adapters::MongoAdapter.new(source_collection))
|
19
|
+
|
20
|
+
|
21
|
+
logger = Logger.new(STDOUT)
|
22
|
+
logger.level = Logger::DEBUG
|
23
|
+
|
24
|
+
refiners = [
|
25
|
+
Jamnagar::Refiners::PrimaryKeyGeneration.new,
|
26
|
+
Jamnagar::Refiners::UrlExpansion.new,
|
27
|
+
Jamnagar::Refiners::DuplicateDetection.new(store: item_storage),
|
28
|
+
Jamnagar::Refiners::SourceDetail.new(store: source_storage),
|
29
|
+
Jamnagar::Refiners::PopularityIncrementation.new(store: item_storage),
|
30
|
+
Jamnagar::Refiners::MetaDataExtraction.new
|
31
|
+
]
|
32
|
+
|
33
|
+
verifiers = [
|
34
|
+
Jamnagar::Verifiers::Twitter::UniquenessVerifier.new(store: item_storage)
|
35
|
+
]
|
36
|
+
|
37
|
+
refinery = Jamnagar::Refineries::ContentRefinery.new({
|
38
|
+
:items => items,
|
39
|
+
:refiners => refiners,
|
40
|
+
:storage => item_storage,
|
41
|
+
:verifiers => verifiers
|
42
|
+
})
|
43
|
+
s_time = Time.now
|
44
|
+
refinery.refine
|
45
|
+
e_time = Time.now
|
46
|
+
|
47
|
+
puts "\n------------------------\n"
|
48
|
+
puts "Refining took: #{(e_time - s_time).to_i} seconds"
|
49
|
+
puts "\n------------------------\n"
|
data/sentinal
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Jamnagar::Storage::BasicStore do
|
4
|
+
context 'adapters' do
|
5
|
+
it 'should allow the use of a separate adapter' do
|
6
|
+
adapter = Jamnagar::SpecHelpers::SimpleAdapter.new
|
7
|
+
sut = Jamnagar::Storage::BasicStore.new(adapter)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
context "when an item is empty" do
|
11
|
+
it 'should not store the item' do
|
12
|
+
adapter = Jamnagar::SpecHelpers::SimpleAdapter.new
|
13
|
+
sut = Jamnagar::Storage::BasicStore.new(adapter)
|
14
|
+
expect(adapter).to_not receive(:insert)
|
15
|
+
sut.insert(nil)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context 'retriving records' do
|
20
|
+
it 'should get records using a primary key' do
|
21
|
+
sut = Jamnagar::Storage::BasicStore.new
|
22
|
+
sut.insert({"_id" => 1, "url" => "http://example.com"})
|
23
|
+
sut.insert({"_id" => 2, "url" => "http://example.org"})
|
24
|
+
|
25
|
+
expect(sut.get(2)).to eq({"_id" => 2, "url" => "http://example.org"})
|
26
|
+
end
|
27
|
+
context 'when looking for the first record' do
|
28
|
+
it 'should ask the adapter' do
|
29
|
+
adapter = double(Jamnagar::SpecHelpers::SimpleAdapter)
|
30
|
+
sut = Jamnagar::Storage::BasicStore.new(adapter)
|
31
|
+
expect(adapter).to receive(:find_first).with({"_id" => 123})
|
32
|
+
sut.find_first({"_id" => 123})
|
33
|
+
end
|
34
|
+
end
|
35
|
+
context 'when looking for multiple records' do
|
36
|
+
it 'should ask the adapter' do
|
37
|
+
adapter = double(Jamnagar::SpecHelpers::SimpleAdapter)
|
38
|
+
sut = Jamnagar::Storage::BasicStore.new(adapter)
|
39
|
+
expect(adapter).to receive(:find).with({"_id" => 123})
|
40
|
+
sut.find({"_id" => 123})
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
context 'find and modify' do
|
45
|
+
it 'should tell the adapter to find and modify the record' do
|
46
|
+
adapter = double(Jamnagar::SpecHelpers::SimpleAdapter)
|
47
|
+
sut = Jamnagar::Storage::BasicStore.new(adapter)
|
48
|
+
expect(adapter).to receive(:find_and_modify).with({"foo" => "bar"})
|
49
|
+
sut.find_and_modify({"foo" => "bar"})
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
describe 'Content Refinement' do
|
5
|
+
before do
|
6
|
+
@runner = Jamnagar::SpecHelpers::PassThroughRunner.new
|
7
|
+
end
|
8
|
+
it 'should take an options hash' do
|
9
|
+
sut = Jamnagar::Refineries::ContentRefinery.new({})
|
10
|
+
end
|
11
|
+
it 'should accept a group of unrefined items for refinement' do
|
12
|
+
sut = Jamnagar::Refineries::ContentRefinery.new({:items => []})
|
13
|
+
end
|
14
|
+
it 'should accept a list of refiners' do
|
15
|
+
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [], :refiners => []})
|
16
|
+
end
|
17
|
+
it 'should accept some form of storage' do
|
18
|
+
storage_tank = double(Jamnagar::Storage::ItemStore)
|
19
|
+
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [], :refiners => [], :storage => storage_tank})
|
20
|
+
end
|
21
|
+
describe 'Converting Raw Items to First Class Items' do
|
22
|
+
it 'should convert incoming item hashes to first class items' do
|
23
|
+
raw = {"url" => "http://example.com", "id" => 1}
|
24
|
+
store = Jamnagar::SpecHelpers::SimpleItemStore.new
|
25
|
+
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [raw], :refiners => [], :storage => store, :runner => @runner})
|
26
|
+
expect(Jamnagar::Materials::Item).to receive(:new).with(raw)
|
27
|
+
sut.refine
|
28
|
+
end
|
29
|
+
end
|
30
|
+
describe 'Storing refined items' do
|
31
|
+
it 'should store the items after they are refined' do
|
32
|
+
item = Jamnagar::Materials::Item.new({"id" => 1})
|
33
|
+
storage_tank = double(Jamnagar::Storage::ItemStore)
|
34
|
+
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [], :storage => storage_tank, :runner => @runner})
|
35
|
+
expect(storage_tank).to receive(:insert).with(item)
|
36
|
+
sut.refine
|
37
|
+
end
|
38
|
+
context 'when something blows up' do
|
39
|
+
it 'should log the result' do
|
40
|
+
item = Jamnagar::Materials::Item.new({"_id" => 1})
|
41
|
+
storage_tank = double(Jamnagar::Storage::ItemStore)
|
42
|
+
logger = double(Logger, :debug => true)
|
43
|
+
expect(logger).to receive(:error).with("Insert Error: _id => 1")
|
44
|
+
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [], :storage => storage_tank, :logger => logger, :runner => @runner})
|
45
|
+
allow(storage_tank).to receive(:insert).and_raise(Jamnagar::Adapters::InsertError)
|
46
|
+
sut.refine
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
describe 'Refining with refiners' do
|
51
|
+
it 'should refine each item with each refiner present' do
|
52
|
+
item = Jamnagar::Materials::Item.new
|
53
|
+
refiner = double(Jamnagar::Refiners::Refiner)
|
54
|
+
store = Jamnagar::SpecHelpers::SimpleItemStore.new
|
55
|
+
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [refiner], :storage => store, :runner => @runner})
|
56
|
+
|
57
|
+
expect(refiner).to receive(:refine).with(item)
|
58
|
+
|
59
|
+
sut.refine
|
60
|
+
end
|
61
|
+
end
|
62
|
+
describe 'Checking Quality with Verifiers' do
|
63
|
+
it 'should refine each item with each refiner present' do
|
64
|
+
item = Jamnagar::Materials::Item.new
|
65
|
+
verifier = double(Jamnagar::Verifiers::Verifier)
|
66
|
+
store = Jamnagar::SpecHelpers::SimpleItemStore.new
|
67
|
+
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :verifiers => [verifier], :storage => store, :runner => @runner})
|
68
|
+
|
69
|
+
expect(verifier).to receive(:verify).with(item)
|
70
|
+
|
71
|
+
sut.refine
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Jamnagar::Refiners::ContributorDetail do
|
4
|
+
it 'accepts a store' do
|
5
|
+
store = {}
|
6
|
+
Jamnagar::Refiners::ContributorDetail.new(store: store)
|
7
|
+
end
|
8
|
+
it 'should look for the user in the contributor store' do
|
9
|
+
@raw = {"user" => {"id" => 43550495}}
|
10
|
+
@item = Jamnagar::Materials::Item.new({"raw" => @raw})
|
11
|
+
@store = double(Jamnagar::Storage::ContributorStore)
|
12
|
+
@sut = Jamnagar::Refiners::ContributorDetail.new(store: @store)
|
13
|
+
|
14
|
+
expect(@store).to receive(:find_contributor).with(@item, @raw["user"])
|
15
|
+
@sut.refine(@item)
|
16
|
+
end
|
17
|
+
it 'should return the details of the contributor' do
|
18
|
+
@raw = {"user" => {"id" => 43550495}}
|
19
|
+
@item = Jamnagar::Materials::Item.new({"raw" => @raw})
|
20
|
+
@store = double(Jamnagar::Storage::ContributorStore)
|
21
|
+
@sut = Jamnagar::Refiners::ContributorDetail.new(store: @store)
|
22
|
+
allow(@store).to receive(:find_contributor).and_return({"_id" => 999})
|
23
|
+
|
24
|
+
expect(@sut.refine(@item).to_h).to eq({"raw" => {"user" => {"id" => 43550495}}, "contributor" => {"_id" => 999}})
|
25
|
+
end
|
26
|
+
end
|