jamnagar 1.3.8 → 1.3.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/lib/jamnagar/materials/item.rb +14 -3
- data/lib/jamnagar/refineries/content_refinery.rb +1 -1
- data/lib/jamnagar/refiners/meta_data_extraction.rb +12 -1
- data/lib/jamnagar/utilities/meta_data_extractor.rb +7 -0
- data/lib/jamnagar/version.rb +1 -1
- data/spec/content_refinement_spec.rb +5 -5
- data/spec/contributor_detail_refinment_spec.rb +2 -2
- data/spec/item_spec.rb +2 -2
- data/spec/refined_item_store_spec.rb +1 -1
- data/spec/refinements_spec.rb +29 -17
- data/spec/source_detail_refinment_spec.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2391a4522a20d651afd97e7546ddc81e27afaa6d
|
4
|
+
data.tar.gz: 7d4804843765dfa17d99243f2cc5f0f950029334
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8acab6d866c0722507682644e5b598c0211071c46c87d757ce91bb93c66fac975f72cdf276c85bd011223dc3e9c3bc6c889ba6c3cacc58793abe10707796a64f
|
7
|
+
data.tar.gz: b479adf7afe323530225d1ab2b9ddb1264883ab5654f852fe27a398ed58644ffc0606812e3aee7c44773d521dc2631ef5e94266fdc644ad1cf852e489f49f438
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
jamnagar (1.3.
|
4
|
+
jamnagar (1.3.8)
|
5
5
|
httparty (~> 0.13)
|
6
6
|
moped (~> 2.0)
|
7
7
|
multi_json (~> 1.10)
|
@@ -14,7 +14,7 @@ GEM
|
|
14
14
|
blinky-tape-test-status (1.1.3)
|
15
15
|
serialport
|
16
16
|
bson (2.3.0)
|
17
|
-
connection_pool (2.
|
17
|
+
connection_pool (2.2.0)
|
18
18
|
diff-lcs (1.2.5)
|
19
19
|
docile (1.1.5)
|
20
20
|
httparty (0.13.3)
|
@@ -3,13 +3,24 @@ require 'ostruct'
|
|
3
3
|
module Jamnagar
|
4
4
|
module Materials
|
5
5
|
class Item < Ore
|
6
|
-
def raw_contributor
|
7
|
-
to_h["raw"]["user"]
|
8
|
-
end
|
9
6
|
def raw_source
|
10
7
|
host = to_h['final_url_host'] || "nosource.jamnagar.co"
|
11
8
|
{"id" => host}
|
12
9
|
end
|
13
10
|
end
|
11
|
+
module Twitter
|
12
|
+
class Item < Jamnagar::Materials::Item
|
13
|
+
def raw_contributor
|
14
|
+
to_h["raw"]["user"]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
module RSS
|
19
|
+
class Item < Jamnagar::Materials::Item
|
20
|
+
def raw_contributor
|
21
|
+
to_h["raw"]["author"]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
14
25
|
end
|
15
26
|
end
|
@@ -38,7 +38,7 @@ module Jamnagar
|
|
38
38
|
end
|
39
39
|
|
40
40
|
def convert(item)
|
41
|
-
return Jamnagar::Materials::Item.new(item) unless item.is_a?(Jamnagar::Materials::Item)
|
41
|
+
return Jamnagar::Materials::Twitter::Item.new(item) unless item.is_a?(Jamnagar::Materials::Twitter::Item)
|
42
42
|
item
|
43
43
|
end
|
44
44
|
end
|
@@ -1,9 +1,20 @@
|
|
1
1
|
module Jamnagar
|
2
2
|
module Refiners
|
3
|
+
module RSS
|
4
|
+
class MetaDataExtraction < Refiner
|
5
|
+
def initialize(extractor=nil)
|
6
|
+
@extractor = extractor || Jamnagar::Utilities::RSS::MetaDataExtractor.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def refinement_result(item)
|
10
|
+
@extractor.extract(item)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
3
14
|
module Twitter
|
4
15
|
class MetaDataExtraction < Refiner
|
5
16
|
def initialize(extractor=nil)
|
6
|
-
@extractor = extractor || Jamnagar::Utilities::MetaDataExtractor.new
|
17
|
+
@extractor = extractor || Jamnagar::Utilities::Twitter::MetaDataExtractor.new
|
7
18
|
end
|
8
19
|
|
9
20
|
def refinement_result(item)
|
data/lib/jamnagar/version.rb
CHANGED
@@ -23,13 +23,13 @@ describe 'Content Refinement' do
|
|
23
23
|
raw = {"url" => "http://example.com", "id" => 1}
|
24
24
|
store = Jamnagar::SpecHelpers::SimpleItemStore.new
|
25
25
|
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [raw], :refiners => [], :storage => store, :runner => @runner})
|
26
|
-
expect(Jamnagar::Materials::Item).to receive(:new).with(raw)
|
26
|
+
expect(Jamnagar::Materials::Twitter::Item).to receive(:new).with(raw)
|
27
27
|
sut.refine
|
28
28
|
end
|
29
29
|
end
|
30
30
|
describe 'Storing refined items' do
|
31
31
|
it 'should store the items after they are refined' do
|
32
|
-
item = Jamnagar::Materials::Item.new({"id" => 1})
|
32
|
+
item = Jamnagar::Materials::Twitter::Item.new({"id" => 1})
|
33
33
|
storage_tank = double(Jamnagar::Storage::ItemStore)
|
34
34
|
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [], :storage => storage_tank, :runner => @runner})
|
35
35
|
expect(storage_tank).to receive(:insert).with(item)
|
@@ -37,7 +37,7 @@ describe 'Content Refinement' do
|
|
37
37
|
end
|
38
38
|
context 'when something blows up' do
|
39
39
|
it 'should log the result' do
|
40
|
-
item = Jamnagar::Materials::Item.new({"_id" => 1})
|
40
|
+
item = Jamnagar::Materials::Twitter::Item.new({"_id" => 1})
|
41
41
|
storage_tank = double(Jamnagar::Storage::ItemStore)
|
42
42
|
logger = double(Logger, :debug => true)
|
43
43
|
expect(logger).to receive(:error).with("Insert Error: _id => 1")
|
@@ -49,7 +49,7 @@ describe 'Content Refinement' do
|
|
49
49
|
end
|
50
50
|
describe 'Refining with refiners' do
|
51
51
|
it 'should refine each item with each refiner present' do
|
52
|
-
item = Jamnagar::Materials::Item.new
|
52
|
+
item = Jamnagar::Materials::Twitter::Item.new
|
53
53
|
refiner = double(Jamnagar::Refiners::Refiner)
|
54
54
|
store = Jamnagar::SpecHelpers::SimpleItemStore.new
|
55
55
|
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [refiner], :storage => store, :runner => @runner})
|
@@ -61,7 +61,7 @@ describe 'Content Refinement' do
|
|
61
61
|
end
|
62
62
|
describe 'Checking Quality with Verifiers' do
|
63
63
|
it 'should refine each item with each refiner present' do
|
64
|
-
item = Jamnagar::Materials::Item.new
|
64
|
+
item = Jamnagar::Materials::Twitter::Item.new
|
65
65
|
verifier = double(Jamnagar::Verifiers::Verifier)
|
66
66
|
store = Jamnagar::SpecHelpers::SimpleItemStore.new
|
67
67
|
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :verifiers => [verifier], :storage => store, :runner => @runner})
|
@@ -7,7 +7,7 @@ describe Jamnagar::Refiners::ContributorDetail do
|
|
7
7
|
end
|
8
8
|
it 'should look for the user in the contributor store' do
|
9
9
|
@raw = {"user" => {"id" => 43550495}}
|
10
|
-
@item = Jamnagar::Materials::Item.new({"raw" => @raw})
|
10
|
+
@item = Jamnagar::Materials::Twitter::Item.new({"raw" => @raw})
|
11
11
|
@store = double(Jamnagar::Storage::ContributorStore)
|
12
12
|
@sut = Jamnagar::Refiners::ContributorDetail.new(store: @store)
|
13
13
|
|
@@ -16,7 +16,7 @@ describe Jamnagar::Refiners::ContributorDetail do
|
|
16
16
|
end
|
17
17
|
it 'should return the details of the contributor' do
|
18
18
|
@raw = {"user" => {"id" => 43550495}}
|
19
|
-
@item = Jamnagar::Materials::Item.new({"raw" => @raw})
|
19
|
+
@item = Jamnagar::Materials::Twitter::Item.new({"raw" => @raw})
|
20
20
|
@store = double(Jamnagar::Storage::ContributorStore)
|
21
21
|
@sut = Jamnagar::Refiners::ContributorDetail.new(store: @store)
|
22
22
|
allow(@store).to receive(:find_contributor).and_return({"_id" => 999})
|
data/spec/item_spec.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Jamnagar::Materials::Item do
|
3
|
+
describe Jamnagar::Materials::Twitter::Item do
|
4
4
|
it 'should be able to merge refinements' do
|
5
|
-
sut = Jamnagar::Materials::Item.new
|
5
|
+
sut = Jamnagar::Materials::Twitter::Item.new
|
6
6
|
sut.merge_refinement({:foo => 'bar'})
|
7
7
|
expect(sut.to_h).to eq({:foo => 'bar'})
|
8
8
|
end
|
@@ -19,7 +19,7 @@ describe 'Refined Item Store' do
|
|
19
19
|
sut.duplicates_of(1, "http://example.com")
|
20
20
|
end
|
21
21
|
it 'should return any results' do
|
22
|
-
items = [Jamnagar::Materials::Item.new, Jamnagar::Materials::Item.new]
|
22
|
+
items = [Jamnagar::Materials::Twitter::Item.new, Jamnagar::Materials::Twitter::Item.new]
|
23
23
|
adapter = double(Jamnagar::Adapters::MongoAdapter)
|
24
24
|
allow(adapter).to receive(:find).and_return(items)
|
25
25
|
sut = Jamnagar::Storage::RefinedItemStore.new(adapter)
|
data/spec/refinements_spec.rb
CHANGED
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe 'Refinements' do
|
4
4
|
describe 'Generic Refiner' do
|
5
5
|
it 'should tell the item to merge the refinements' do
|
6
|
-
item = double(Jamnagar::Materials::Item, :merge_refinement => {})
|
6
|
+
item = double(Jamnagar::Materials::Twitter::Item, :merge_refinement => {})
|
7
7
|
expect(item).to receive(:merge_refinement).with({})
|
8
8
|
sut = Jamnagar::Refiners::Refiner.new
|
9
9
|
sut.refine(item)
|
@@ -11,7 +11,7 @@ describe 'Refinements' do
|
|
11
11
|
end
|
12
12
|
describe 'Primary Key' do
|
13
13
|
it 'should tell the key generator to generate a primary key' do
|
14
|
-
item = Jamnagar::Materials::Item.new({"url" => "http://bit.ly/123"})
|
14
|
+
item = Jamnagar::Materials::Twitter::Item.new({"url" => "http://bit.ly/123"})
|
15
15
|
digester = double(Digest::MD5)
|
16
16
|
|
17
17
|
sut = Jamnagar::Refiners::PrimaryKeyGeneration.new(digester)
|
@@ -19,7 +19,7 @@ describe 'Refinements' do
|
|
19
19
|
sut.refine(item)
|
20
20
|
end
|
21
21
|
it 'should return the generated key' do
|
22
|
-
item = Jamnagar::Materials::Item.new({"url" => "http://bit.ly/123"})
|
22
|
+
item = Jamnagar::Materials::Twitter::Item.new({"url" => "http://bit.ly/123"})
|
23
23
|
digester = double(Digest::MD5)
|
24
24
|
|
25
25
|
sut = Jamnagar::Refiners::PrimaryKeyGeneration.new(digester)
|
@@ -29,7 +29,7 @@ describe 'Refinements' do
|
|
29
29
|
end
|
30
30
|
describe 'URL Expansion' do
|
31
31
|
it 'should tell the url expander to expand the shortened url' do
|
32
|
-
item = Jamnagar::Materials::Item.new({"url" => "http://bit.ly/123"})
|
32
|
+
item = Jamnagar::Materials::Twitter::Item.new({"url" => "http://bit.ly/123"})
|
33
33
|
expander = double(Jamnagar::Utilities::UrlExpander)
|
34
34
|
|
35
35
|
sut = Jamnagar::Refiners::UrlExpansion.new(expander)
|
@@ -37,7 +37,7 @@ describe 'Refinements' do
|
|
37
37
|
sut.refine(item)
|
38
38
|
end
|
39
39
|
it 'should return details of the expanded url' do
|
40
|
-
item = Jamnagar::Materials::Item.new({"url" => "http://bit.ly/123"})
|
40
|
+
item = Jamnagar::Materials::Twitter::Item.new({"url" => "http://bit.ly/123"})
|
41
41
|
expander = double(Jamnagar::Utilities::UrlExpander)
|
42
42
|
|
43
43
|
sut = Jamnagar::Refiners::UrlExpansion.new(expander)
|
@@ -47,7 +47,7 @@ describe 'Refinements' do
|
|
47
47
|
end
|
48
48
|
describe 'UTM Stripping' do
|
49
49
|
it 'should strip all utm related query string params' do
|
50
|
-
item = Jamnagar::Materials::Item.new({"final_url" => "http://bit.ly/123?utm_foo=123"})
|
50
|
+
item = Jamnagar::Materials::Twitter::Item.new({"final_url" => "http://bit.ly/123?utm_foo=123"})
|
51
51
|
stripper = double(Jamnagar::Utilities::UtmStripper)
|
52
52
|
|
53
53
|
sut = Jamnagar::Refiners::UtmStripping.new(stripper)
|
@@ -57,7 +57,7 @@ describe 'Refinements' do
|
|
57
57
|
end
|
58
58
|
describe 'Duplicates' do
|
59
59
|
it 'should tell the duplicate detector to detect duplicates' do
|
60
|
-
item = Jamnagar::Materials::Item.new({"final_url" => "http://bit.ly/123"})
|
60
|
+
item = Jamnagar::Materials::Twitter::Item.new({"final_url" => "http://bit.ly/123"})
|
61
61
|
detector = double(Jamnagar::Utilities::DuplicateDetector)
|
62
62
|
|
63
63
|
sut = Jamnagar::Refiners::DuplicateDetection.new(detector)
|
@@ -66,7 +66,7 @@ describe 'Refinements' do
|
|
66
66
|
end
|
67
67
|
context 'when no duplicates are found' do
|
68
68
|
it 'should not return duplicates' do
|
69
|
-
item = Jamnagar::Materials::Item.new()
|
69
|
+
item = Jamnagar::Materials::Twitter::Item.new()
|
70
70
|
detector = double(Jamnagar::Utilities::DuplicateDetector)
|
71
71
|
sut = Jamnagar::Refiners::DuplicateDetection.new(detector)
|
72
72
|
allow(detector).to receive(:detect).and_return(nil)
|
@@ -75,7 +75,7 @@ describe 'Refinements' do
|
|
75
75
|
end
|
76
76
|
context 'when duplicates are found' do
|
77
77
|
it 'should return duplicate details' do
|
78
|
-
item = Jamnagar::Materials::Item.new()
|
78
|
+
item = Jamnagar::Materials::Twitter::Item.new()
|
79
79
|
detector = double(Jamnagar::Utilities::DuplicateDetector)
|
80
80
|
sut = Jamnagar::Refiners::DuplicateDetection.new(detector)
|
81
81
|
allow(detector).to receive(:detect).and_return({"_id" => 456})
|
@@ -86,7 +86,7 @@ describe 'Refinements' do
|
|
86
86
|
describe 'Popularity' do
|
87
87
|
context 'when the item is not a duplicate' do
|
88
88
|
it 'should not increment any items' do
|
89
|
-
item = Jamnagar::Materials::Item.new({"duplicate" => false})
|
89
|
+
item = Jamnagar::Materials::Twitter::Item.new({"duplicate" => false})
|
90
90
|
incrementor = double(Jamnagar::Utilities::PopularityIncrementor)
|
91
91
|
|
92
92
|
sut = Jamnagar::Refiners::PopularityIncrementation.new(incrementor)
|
@@ -96,7 +96,7 @@ describe 'Refinements' do
|
|
96
96
|
end
|
97
97
|
context 'when the item is a duplicate' do
|
98
98
|
it 'should tell the popularity incrementer to increment the popularity of the original item' do
|
99
|
-
item = Jamnagar::Materials::Item.new({"duplicate" => true, "duplicate_of" => "123"})
|
99
|
+
item = Jamnagar::Materials::Twitter::Item.new({"duplicate" => true, "duplicate_of" => "123"})
|
100
100
|
incrementor = double(Jamnagar::Utilities::PopularityIncrementor)
|
101
101
|
|
102
102
|
sut = Jamnagar::Refiners::PopularityIncrementation.new(incrementor)
|
@@ -106,13 +106,25 @@ describe 'Refinements' do
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
describe 'Meta Data Extraction' do
|
109
|
-
|
110
|
-
|
111
|
-
|
109
|
+
context 'Twitter' do
|
110
|
+
it 'should tell the twitter extractor to extract meta data' do
|
111
|
+
item = Jamnagar::Materials::Twitter::Item.new({})
|
112
|
+
extractor = double(Jamnagar::Utilities::Twitter::MetaDataExtractor)
|
112
113
|
|
113
|
-
|
114
|
-
|
115
|
-
|
114
|
+
sut = Jamnagar::Refiners::Twitter::MetaDataExtraction.new(extractor)
|
115
|
+
expect(extractor).to receive(:extract).with(item)
|
116
|
+
sut.refine(item)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
context 'RSS' do
|
120
|
+
it 'should tell the rss extractor to extract meta data' do
|
121
|
+
item = Jamnagar::Materials::RSS::Item.new({})
|
122
|
+
extractor = double(Jamnagar::Utilities::RSS::MetaDataExtractor)
|
123
|
+
|
124
|
+
sut = Jamnagar::Refiners::RSS::MetaDataExtraction.new(extractor)
|
125
|
+
expect(extractor).to receive(:extract).with(item)
|
126
|
+
sut.refine(item)
|
127
|
+
end
|
116
128
|
end
|
117
129
|
end
|
118
130
|
end
|
@@ -6,7 +6,7 @@ describe Jamnagar::Refiners::SourceDetail do
|
|
6
6
|
Jamnagar::Refiners::SourceDetail.new(store: store)
|
7
7
|
end
|
8
8
|
it 'should look for the user in the source store' do
|
9
|
-
@item = Jamnagar::Materials::Item.new({"final_url_host" => "example.com"})
|
9
|
+
@item = Jamnagar::Materials::Twitter::Item.new({"final_url_host" => "example.com"})
|
10
10
|
@store = double(Jamnagar::Storage::SourceStore)
|
11
11
|
@sut = Jamnagar::Refiners::SourceDetail.new(store: @store)
|
12
12
|
|
@@ -14,7 +14,7 @@ describe Jamnagar::Refiners::SourceDetail do
|
|
14
14
|
@sut.refine(@item)
|
15
15
|
end
|
16
16
|
it 'should return the details of the source' do
|
17
|
-
@item = Jamnagar::Materials::Item.new({"final_url_host" => "example.org"})
|
17
|
+
@item = Jamnagar::Materials::Twitter::Item.new({"final_url_host" => "example.org"})
|
18
18
|
@store = double(Jamnagar::Storage::SourceStore)
|
19
19
|
@sut = Jamnagar::Refiners::SourceDetail.new(store: @store)
|
20
20
|
allow(@store).to receive(:find_source).and_return({"_id" => 999})
|