jamnagar 1.3.8 → 1.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/lib/jamnagar/materials/item.rb +14 -3
- data/lib/jamnagar/refineries/content_refinery.rb +1 -1
- data/lib/jamnagar/refiners/meta_data_extraction.rb +12 -1
- data/lib/jamnagar/utilities/meta_data_extractor.rb +7 -0
- data/lib/jamnagar/version.rb +1 -1
- data/spec/content_refinement_spec.rb +5 -5
- data/spec/contributor_detail_refinment_spec.rb +2 -2
- data/spec/item_spec.rb +2 -2
- data/spec/refined_item_store_spec.rb +1 -1
- data/spec/refinements_spec.rb +29 -17
- data/spec/source_detail_refinment_spec.rb +2 -2
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2391a4522a20d651afd97e7546ddc81e27afaa6d
|
4
|
+
data.tar.gz: 7d4804843765dfa17d99243f2cc5f0f950029334
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8acab6d866c0722507682644e5b598c0211071c46c87d757ce91bb93c66fac975f72cdf276c85bd011223dc3e9c3bc6c889ba6c3cacc58793abe10707796a64f
|
7
|
+
data.tar.gz: b479adf7afe323530225d1ab2b9ddb1264883ab5654f852fe27a398ed58644ffc0606812e3aee7c44773d521dc2631ef5e94266fdc644ad1cf852e489f49f438
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
jamnagar (1.3.
|
4
|
+
jamnagar (1.3.8)
|
5
5
|
httparty (~> 0.13)
|
6
6
|
moped (~> 2.0)
|
7
7
|
multi_json (~> 1.10)
|
@@ -14,7 +14,7 @@ GEM
|
|
14
14
|
blinky-tape-test-status (1.1.3)
|
15
15
|
serialport
|
16
16
|
bson (2.3.0)
|
17
|
-
connection_pool (2.
|
17
|
+
connection_pool (2.2.0)
|
18
18
|
diff-lcs (1.2.5)
|
19
19
|
docile (1.1.5)
|
20
20
|
httparty (0.13.3)
|
@@ -3,13 +3,24 @@ require 'ostruct'
|
|
3
3
|
module Jamnagar
|
4
4
|
module Materials
|
5
5
|
class Item < Ore
|
6
|
-
def raw_contributor
|
7
|
-
to_h["raw"]["user"]
|
8
|
-
end
|
9
6
|
def raw_source
|
10
7
|
host = to_h['final_url_host'] || "nosource.jamnagar.co"
|
11
8
|
{"id" => host}
|
12
9
|
end
|
13
10
|
end
|
11
|
+
module Twitter
|
12
|
+
class Item < Jamnagar::Materials::Item
|
13
|
+
def raw_contributor
|
14
|
+
to_h["raw"]["user"]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
module RSS
|
19
|
+
class Item < Jamnagar::Materials::Item
|
20
|
+
def raw_contributor
|
21
|
+
to_h["raw"]["author"]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
14
25
|
end
|
15
26
|
end
|
@@ -38,7 +38,7 @@ module Jamnagar
|
|
38
38
|
end
|
39
39
|
|
40
40
|
def convert(item)
|
41
|
-
return Jamnagar::Materials::Item.new(item) unless item.is_a?(Jamnagar::Materials::Item)
|
41
|
+
return Jamnagar::Materials::Twitter::Item.new(item) unless item.is_a?(Jamnagar::Materials::Twitter::Item)
|
42
42
|
item
|
43
43
|
end
|
44
44
|
end
|
@@ -1,9 +1,20 @@
|
|
1
1
|
module Jamnagar
|
2
2
|
module Refiners
|
3
|
+
module RSS
|
4
|
+
class MetaDataExtraction < Refiner
|
5
|
+
def initialize(extractor=nil)
|
6
|
+
@extractor = extractor || Jamnagar::Utilities::RSS::MetaDataExtractor.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def refinement_result(item)
|
10
|
+
@extractor.extract(item)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
3
14
|
module Twitter
|
4
15
|
class MetaDataExtraction < Refiner
|
5
16
|
def initialize(extractor=nil)
|
6
|
-
@extractor = extractor || Jamnagar::Utilities::MetaDataExtractor.new
|
17
|
+
@extractor = extractor || Jamnagar::Utilities::Twitter::MetaDataExtractor.new
|
7
18
|
end
|
8
19
|
|
9
20
|
def refinement_result(item)
|
data/lib/jamnagar/version.rb
CHANGED
@@ -23,13 +23,13 @@ describe 'Content Refinement' do
|
|
23
23
|
raw = {"url" => "http://example.com", "id" => 1}
|
24
24
|
store = Jamnagar::SpecHelpers::SimpleItemStore.new
|
25
25
|
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [raw], :refiners => [], :storage => store, :runner => @runner})
|
26
|
-
expect(Jamnagar::Materials::Item).to receive(:new).with(raw)
|
26
|
+
expect(Jamnagar::Materials::Twitter::Item).to receive(:new).with(raw)
|
27
27
|
sut.refine
|
28
28
|
end
|
29
29
|
end
|
30
30
|
describe 'Storing refined items' do
|
31
31
|
it 'should store the items after they are refined' do
|
32
|
-
item = Jamnagar::Materials::Item.new({"id" => 1})
|
32
|
+
item = Jamnagar::Materials::Twitter::Item.new({"id" => 1})
|
33
33
|
storage_tank = double(Jamnagar::Storage::ItemStore)
|
34
34
|
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [], :storage => storage_tank, :runner => @runner})
|
35
35
|
expect(storage_tank).to receive(:insert).with(item)
|
@@ -37,7 +37,7 @@ describe 'Content Refinement' do
|
|
37
37
|
end
|
38
38
|
context 'when something blows up' do
|
39
39
|
it 'should log the result' do
|
40
|
-
item = Jamnagar::Materials::Item.new({"_id" => 1})
|
40
|
+
item = Jamnagar::Materials::Twitter::Item.new({"_id" => 1})
|
41
41
|
storage_tank = double(Jamnagar::Storage::ItemStore)
|
42
42
|
logger = double(Logger, :debug => true)
|
43
43
|
expect(logger).to receive(:error).with("Insert Error: _id => 1")
|
@@ -49,7 +49,7 @@ describe 'Content Refinement' do
|
|
49
49
|
end
|
50
50
|
describe 'Refining with refiners' do
|
51
51
|
it 'should refine each item with each refiner present' do
|
52
|
-
item = Jamnagar::Materials::Item.new
|
52
|
+
item = Jamnagar::Materials::Twitter::Item.new
|
53
53
|
refiner = double(Jamnagar::Refiners::Refiner)
|
54
54
|
store = Jamnagar::SpecHelpers::SimpleItemStore.new
|
55
55
|
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :refiners => [refiner], :storage => store, :runner => @runner})
|
@@ -61,7 +61,7 @@ describe 'Content Refinement' do
|
|
61
61
|
end
|
62
62
|
describe 'Checking Quality with Verifiers' do
|
63
63
|
it 'should refine each item with each refiner present' do
|
64
|
-
item = Jamnagar::Materials::Item.new
|
64
|
+
item = Jamnagar::Materials::Twitter::Item.new
|
65
65
|
verifier = double(Jamnagar::Verifiers::Verifier)
|
66
66
|
store = Jamnagar::SpecHelpers::SimpleItemStore.new
|
67
67
|
sut = Jamnagar::Refineries::ContentRefinery.new({:items => [item], :verifiers => [verifier], :storage => store, :runner => @runner})
|
@@ -7,7 +7,7 @@ describe Jamnagar::Refiners::ContributorDetail do
|
|
7
7
|
end
|
8
8
|
it 'should look for the user in the contributor store' do
|
9
9
|
@raw = {"user" => {"id" => 43550495}}
|
10
|
-
@item = Jamnagar::Materials::Item.new({"raw" => @raw})
|
10
|
+
@item = Jamnagar::Materials::Twitter::Item.new({"raw" => @raw})
|
11
11
|
@store = double(Jamnagar::Storage::ContributorStore)
|
12
12
|
@sut = Jamnagar::Refiners::ContributorDetail.new(store: @store)
|
13
13
|
|
@@ -16,7 +16,7 @@ describe Jamnagar::Refiners::ContributorDetail do
|
|
16
16
|
end
|
17
17
|
it 'should return the details of the contributor' do
|
18
18
|
@raw = {"user" => {"id" => 43550495}}
|
19
|
-
@item = Jamnagar::Materials::Item.new({"raw" => @raw})
|
19
|
+
@item = Jamnagar::Materials::Twitter::Item.new({"raw" => @raw})
|
20
20
|
@store = double(Jamnagar::Storage::ContributorStore)
|
21
21
|
@sut = Jamnagar::Refiners::ContributorDetail.new(store: @store)
|
22
22
|
allow(@store).to receive(:find_contributor).and_return({"_id" => 999})
|
data/spec/item_spec.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Jamnagar::Materials::Item do
|
3
|
+
describe Jamnagar::Materials::Twitter::Item do
|
4
4
|
it 'should be able to merge refinements' do
|
5
|
-
sut = Jamnagar::Materials::Item.new
|
5
|
+
sut = Jamnagar::Materials::Twitter::Item.new
|
6
6
|
sut.merge_refinement({:foo => 'bar'})
|
7
7
|
expect(sut.to_h).to eq({:foo => 'bar'})
|
8
8
|
end
|
@@ -19,7 +19,7 @@ describe 'Refined Item Store' do
|
|
19
19
|
sut.duplicates_of(1, "http://example.com")
|
20
20
|
end
|
21
21
|
it 'should return any results' do
|
22
|
-
items = [Jamnagar::Materials::Item.new, Jamnagar::Materials::Item.new]
|
22
|
+
items = [Jamnagar::Materials::Twitter::Item.new, Jamnagar::Materials::Twitter::Item.new]
|
23
23
|
adapter = double(Jamnagar::Adapters::MongoAdapter)
|
24
24
|
allow(adapter).to receive(:find).and_return(items)
|
25
25
|
sut = Jamnagar::Storage::RefinedItemStore.new(adapter)
|
data/spec/refinements_spec.rb
CHANGED
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
describe 'Refinements' do
|
4
4
|
describe 'Generic Refiner' do
|
5
5
|
it 'should tell the item to merge the refinements' do
|
6
|
-
item = double(Jamnagar::Materials::Item, :merge_refinement => {})
|
6
|
+
item = double(Jamnagar::Materials::Twitter::Item, :merge_refinement => {})
|
7
7
|
expect(item).to receive(:merge_refinement).with({})
|
8
8
|
sut = Jamnagar::Refiners::Refiner.new
|
9
9
|
sut.refine(item)
|
@@ -11,7 +11,7 @@ describe 'Refinements' do
|
|
11
11
|
end
|
12
12
|
describe 'Primary Key' do
|
13
13
|
it 'should tell the key generator to generate a primary key' do
|
14
|
-
item = Jamnagar::Materials::Item.new({"url" => "http://bit.ly/123"})
|
14
|
+
item = Jamnagar::Materials::Twitter::Item.new({"url" => "http://bit.ly/123"})
|
15
15
|
digester = double(Digest::MD5)
|
16
16
|
|
17
17
|
sut = Jamnagar::Refiners::PrimaryKeyGeneration.new(digester)
|
@@ -19,7 +19,7 @@ describe 'Refinements' do
|
|
19
19
|
sut.refine(item)
|
20
20
|
end
|
21
21
|
it 'should return the generated key' do
|
22
|
-
item = Jamnagar::Materials::Item.new({"url" => "http://bit.ly/123"})
|
22
|
+
item = Jamnagar::Materials::Twitter::Item.new({"url" => "http://bit.ly/123"})
|
23
23
|
digester = double(Digest::MD5)
|
24
24
|
|
25
25
|
sut = Jamnagar::Refiners::PrimaryKeyGeneration.new(digester)
|
@@ -29,7 +29,7 @@ describe 'Refinements' do
|
|
29
29
|
end
|
30
30
|
describe 'URL Expansion' do
|
31
31
|
it 'should tell the url expander to expand the shortened url' do
|
32
|
-
item = Jamnagar::Materials::Item.new({"url" => "http://bit.ly/123"})
|
32
|
+
item = Jamnagar::Materials::Twitter::Item.new({"url" => "http://bit.ly/123"})
|
33
33
|
expander = double(Jamnagar::Utilities::UrlExpander)
|
34
34
|
|
35
35
|
sut = Jamnagar::Refiners::UrlExpansion.new(expander)
|
@@ -37,7 +37,7 @@ describe 'Refinements' do
|
|
37
37
|
sut.refine(item)
|
38
38
|
end
|
39
39
|
it 'should return details of the expanded url' do
|
40
|
-
item = Jamnagar::Materials::Item.new({"url" => "http://bit.ly/123"})
|
40
|
+
item = Jamnagar::Materials::Twitter::Item.new({"url" => "http://bit.ly/123"})
|
41
41
|
expander = double(Jamnagar::Utilities::UrlExpander)
|
42
42
|
|
43
43
|
sut = Jamnagar::Refiners::UrlExpansion.new(expander)
|
@@ -47,7 +47,7 @@ describe 'Refinements' do
|
|
47
47
|
end
|
48
48
|
describe 'UTM Stripping' do
|
49
49
|
it 'should strip all utm related query string params' do
|
50
|
-
item = Jamnagar::Materials::Item.new({"final_url" => "http://bit.ly/123?utm_foo=123"})
|
50
|
+
item = Jamnagar::Materials::Twitter::Item.new({"final_url" => "http://bit.ly/123?utm_foo=123"})
|
51
51
|
stripper = double(Jamnagar::Utilities::UtmStripper)
|
52
52
|
|
53
53
|
sut = Jamnagar::Refiners::UtmStripping.new(stripper)
|
@@ -57,7 +57,7 @@ describe 'Refinements' do
|
|
57
57
|
end
|
58
58
|
describe 'Duplicates' do
|
59
59
|
it 'should tell the duplicate detector to detect duplicates' do
|
60
|
-
item = Jamnagar::Materials::Item.new({"final_url" => "http://bit.ly/123"})
|
60
|
+
item = Jamnagar::Materials::Twitter::Item.new({"final_url" => "http://bit.ly/123"})
|
61
61
|
detector = double(Jamnagar::Utilities::DuplicateDetector)
|
62
62
|
|
63
63
|
sut = Jamnagar::Refiners::DuplicateDetection.new(detector)
|
@@ -66,7 +66,7 @@ describe 'Refinements' do
|
|
66
66
|
end
|
67
67
|
context 'when no duplicates are found' do
|
68
68
|
it 'should not return duplicates' do
|
69
|
-
item = Jamnagar::Materials::Item.new()
|
69
|
+
item = Jamnagar::Materials::Twitter::Item.new()
|
70
70
|
detector = double(Jamnagar::Utilities::DuplicateDetector)
|
71
71
|
sut = Jamnagar::Refiners::DuplicateDetection.new(detector)
|
72
72
|
allow(detector).to receive(:detect).and_return(nil)
|
@@ -75,7 +75,7 @@ describe 'Refinements' do
|
|
75
75
|
end
|
76
76
|
context 'when duplicates are found' do
|
77
77
|
it 'should return duplicate details' do
|
78
|
-
item = Jamnagar::Materials::Item.new()
|
78
|
+
item = Jamnagar::Materials::Twitter::Item.new()
|
79
79
|
detector = double(Jamnagar::Utilities::DuplicateDetector)
|
80
80
|
sut = Jamnagar::Refiners::DuplicateDetection.new(detector)
|
81
81
|
allow(detector).to receive(:detect).and_return({"_id" => 456})
|
@@ -86,7 +86,7 @@ describe 'Refinements' do
|
|
86
86
|
describe 'Popularity' do
|
87
87
|
context 'when the item is not a duplicate' do
|
88
88
|
it 'should not increment any items' do
|
89
|
-
item = Jamnagar::Materials::Item.new({"duplicate" => false})
|
89
|
+
item = Jamnagar::Materials::Twitter::Item.new({"duplicate" => false})
|
90
90
|
incrementor = double(Jamnagar::Utilities::PopularityIncrementor)
|
91
91
|
|
92
92
|
sut = Jamnagar::Refiners::PopularityIncrementation.new(incrementor)
|
@@ -96,7 +96,7 @@ describe 'Refinements' do
|
|
96
96
|
end
|
97
97
|
context 'when the item is a duplicate' do
|
98
98
|
it 'should tell the popularity incrementer to increment the popularity of the original item' do
|
99
|
-
item = Jamnagar::Materials::Item.new({"duplicate" => true, "duplicate_of" => "123"})
|
99
|
+
item = Jamnagar::Materials::Twitter::Item.new({"duplicate" => true, "duplicate_of" => "123"})
|
100
100
|
incrementor = double(Jamnagar::Utilities::PopularityIncrementor)
|
101
101
|
|
102
102
|
sut = Jamnagar::Refiners::PopularityIncrementation.new(incrementor)
|
@@ -106,13 +106,25 @@ describe 'Refinements' do
|
|
106
106
|
end
|
107
107
|
end
|
108
108
|
describe 'Meta Data Extraction' do
|
109
|
-
|
110
|
-
|
111
|
-
|
109
|
+
context 'Twitter' do
|
110
|
+
it 'should tell the twitter extractor to extract meta data' do
|
111
|
+
item = Jamnagar::Materials::Twitter::Item.new({})
|
112
|
+
extractor = double(Jamnagar::Utilities::Twitter::MetaDataExtractor)
|
112
113
|
|
113
|
-
|
114
|
-
|
115
|
-
|
114
|
+
sut = Jamnagar::Refiners::Twitter::MetaDataExtraction.new(extractor)
|
115
|
+
expect(extractor).to receive(:extract).with(item)
|
116
|
+
sut.refine(item)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
context 'RSS' do
|
120
|
+
it 'should tell the rss extractor to extract meta data' do
|
121
|
+
item = Jamnagar::Materials::RSS::Item.new({})
|
122
|
+
extractor = double(Jamnagar::Utilities::RSS::MetaDataExtractor)
|
123
|
+
|
124
|
+
sut = Jamnagar::Refiners::RSS::MetaDataExtraction.new(extractor)
|
125
|
+
expect(extractor).to receive(:extract).with(item)
|
126
|
+
sut.refine(item)
|
127
|
+
end
|
116
128
|
end
|
117
129
|
end
|
118
130
|
end
|
@@ -6,7 +6,7 @@ describe Jamnagar::Refiners::SourceDetail do
|
|
6
6
|
Jamnagar::Refiners::SourceDetail.new(store: store)
|
7
7
|
end
|
8
8
|
it 'should look for the user in the source store' do
|
9
|
-
@item = Jamnagar::Materials::Item.new({"final_url_host" => "example.com"})
|
9
|
+
@item = Jamnagar::Materials::Twitter::Item.new({"final_url_host" => "example.com"})
|
10
10
|
@store = double(Jamnagar::Storage::SourceStore)
|
11
11
|
@sut = Jamnagar::Refiners::SourceDetail.new(store: @store)
|
12
12
|
|
@@ -14,7 +14,7 @@ describe Jamnagar::Refiners::SourceDetail do
|
|
14
14
|
@sut.refine(@item)
|
15
15
|
end
|
16
16
|
it 'should return the details of the source' do
|
17
|
-
@item = Jamnagar::Materials::Item.new({"final_url_host" => "example.org"})
|
17
|
+
@item = Jamnagar::Materials::Twitter::Item.new({"final_url_host" => "example.org"})
|
18
18
|
@store = double(Jamnagar::Storage::SourceStore)
|
19
19
|
@sut = Jamnagar::Refiners::SourceDetail.new(store: @store)
|
20
20
|
allow(@store).to receive(:find_source).and_return({"_id" => 999})
|