tripleloop 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
|
|
1
1
|
module Tripleloop
|
2
2
|
class DocumentProcessor
|
3
|
-
attr_reader :document
|
3
|
+
attr_reader :document, :options
|
4
4
|
|
5
|
-
def initialize(document)
|
5
|
+
def initialize(document, options={})
|
6
|
+
@options = options
|
6
7
|
@document = Util.with_nested_fetch(document)
|
7
8
|
end
|
8
9
|
|
@@ -21,9 +22,9 @@ module Tripleloop
|
|
21
22
|
}]
|
22
23
|
end
|
23
24
|
|
24
|
-
def self.batch_process(documents)
|
25
|
+
def self.batch_process(documents, options={})
|
25
26
|
documents.map { |doc|
|
26
|
-
self.new(doc).extracted_statements
|
27
|
+
self.new(doc, options).extracted_statements
|
27
28
|
}.reduce(Hash.new([])) { |accu, statements|
|
28
29
|
accu.merge(statements) { |k, olds, news|
|
29
30
|
olds.concat(news)
|
@@ -34,14 +35,25 @@ module Tripleloop
|
|
34
35
|
private
|
35
36
|
def extractor_instances
|
36
37
|
extractors = self.class.instance_variable_get(:@extractors)
|
38
|
+
filtered_extractors = apply_filters(extractors)
|
37
39
|
|
38
|
-
@extractor_instances ||=
|
40
|
+
@extractor_instances ||= filtered_extractors.map { |ext, opts|
|
39
41
|
klass = extractor_class(ext)
|
40
42
|
context = get_context(opts[:context])
|
41
43
|
klass.new(context)
|
42
44
|
}
|
43
45
|
end
|
44
46
|
|
47
|
+
def apply_filters(extractors)
|
48
|
+
if options[:only]
|
49
|
+
extractors.select { |k,_| Array(options[:only]).include?(k) }
|
50
|
+
elsif options[:except]
|
51
|
+
extractors.reject { |k, _| Array(options[:except]).include?(k) }
|
52
|
+
else
|
53
|
+
extractors
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
45
57
|
def extractor_class(extractor)
|
46
58
|
class_name = Tripleloop::Util::String.classify("#{extractor}_extractor")
|
47
59
|
scope.const_get(class_name)
|
@@ -36,8 +36,10 @@ describe Tripleloop::DocumentProcessor do
|
|
36
36
|
}
|
37
37
|
}}
|
38
38
|
|
39
|
+
let(:options) {{}}
|
40
|
+
|
39
41
|
describe "#extracted_statements" do
|
40
|
-
subject { Example::SampleProcessor.new(document) }
|
42
|
+
subject { Example::SampleProcessor.new(document, options) }
|
41
43
|
|
42
44
|
context "when some of the registered extractors cannot be found" do
|
43
45
|
it "raises an ExtractorNotFound error" do
|
@@ -70,6 +72,28 @@ describe Tripleloop::DocumentProcessor do
|
|
70
72
|
:extractor_2 => :extracted
|
71
73
|
})
|
72
74
|
end
|
75
|
+
|
76
|
+
context "and the :only option is present" do
|
77
|
+
let(:options) {{ :only => [:foo, :baz] }}
|
78
|
+
|
79
|
+
it "executes only the extractors specified" do
|
80
|
+
subject.extracted_statements.should eq({
|
81
|
+
:foo => [[:subject, "foo-value", :object]],
|
82
|
+
:baz => [[:subject, "baz a", :object],
|
83
|
+
[:subject, "baz b", :object]]
|
84
|
+
})
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
context "and the :except option is present" do
|
89
|
+
let(:options) {{ :except=> [:bar, :baz] }}
|
90
|
+
|
91
|
+
it "executes only the extractors specified" do
|
92
|
+
subject.extracted_statements.should eq({
|
93
|
+
:foo => [[:subject, "foo-value", :object]]
|
94
|
+
})
|
95
|
+
end
|
96
|
+
end
|
73
97
|
end
|
74
98
|
end
|
75
99
|
|
@@ -86,10 +110,10 @@ describe Tripleloop::DocumentProcessor do
|
|
86
110
|
}
|
87
111
|
}
|
88
112
|
|
89
|
-
|
113
|
+
let(:options) {{}}
|
90
114
|
|
91
115
|
it "returns a hash of combined statements, grouped by extractor name" do
|
92
|
-
|
116
|
+
Example::SampleProcessor.batch_process(documents).should eq({
|
93
117
|
:foo => [
|
94
118
|
[:subject, "foo-value 0", :object],
|
95
119
|
[:subject, "foo-value 1", :object],
|
@@ -110,5 +134,15 @@ describe Tripleloop::DocumentProcessor do
|
|
110
134
|
]
|
111
135
|
})
|
112
136
|
end
|
137
|
+
|
138
|
+
it "Accepts an options array and forwards it to the constructor" do
|
139
|
+
Example::SampleProcessor.batch_process(documents, {:only => :foo}).should eq({
|
140
|
+
:foo => [
|
141
|
+
[:subject, "foo-value 0", :object],
|
142
|
+
[:subject, "foo-value 1", :object],
|
143
|
+
[:subject, "foo-value 2", :object]
|
144
|
+
],
|
145
|
+
})
|
146
|
+
end
|
113
147
|
end
|
114
148
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tripleloop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rdf
|