tripleloop 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
@@ -1,8 +1,9 @@
|
|
1
1
|
module Tripleloop
|
2
2
|
class DocumentProcessor
|
3
|
-
attr_reader :document
|
3
|
+
attr_reader :document, :options
|
4
4
|
|
5
|
-
def initialize(document)
|
5
|
+
def initialize(document, options={})
|
6
|
+
@options = options
|
6
7
|
@document = Util.with_nested_fetch(document)
|
7
8
|
end
|
8
9
|
|
@@ -21,9 +22,9 @@ module Tripleloop
|
|
21
22
|
}]
|
22
23
|
end
|
23
24
|
|
24
|
-
def self.batch_process(documents)
|
25
|
+
def self.batch_process(documents, options={})
|
25
26
|
documents.map { |doc|
|
26
|
-
self.new(doc).extracted_statements
|
27
|
+
self.new(doc, options).extracted_statements
|
27
28
|
}.reduce(Hash.new([])) { |accu, statements|
|
28
29
|
accu.merge(statements) { |k, olds, news|
|
29
30
|
olds.concat(news)
|
@@ -34,14 +35,25 @@ module Tripleloop
|
|
34
35
|
private
|
35
36
|
def extractor_instances
|
36
37
|
extractors = self.class.instance_variable_get(:@extractors)
|
38
|
+
filtered_extractors = apply_filters(extractors)
|
37
39
|
|
38
|
-
@extractor_instances ||=
|
40
|
+
@extractor_instances ||= filtered_extractors.map { |ext, opts|
|
39
41
|
klass = extractor_class(ext)
|
40
42
|
context = get_context(opts[:context])
|
41
43
|
klass.new(context)
|
42
44
|
}
|
43
45
|
end
|
44
46
|
|
47
|
+
def apply_filters(extractors)
|
48
|
+
if options[:only]
|
49
|
+
extractors.select { |k,_| Array(options[:only]).include?(k) }
|
50
|
+
elsif options[:except]
|
51
|
+
extractors.reject { |k, _| Array(options[:except]).include?(k) }
|
52
|
+
else
|
53
|
+
extractors
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
45
57
|
def extractor_class(extractor)
|
46
58
|
class_name = Tripleloop::Util::String.classify("#{extractor}_extractor")
|
47
59
|
scope.const_get(class_name)
|
@@ -36,8 +36,10 @@ describe Tripleloop::DocumentProcessor do
|
|
36
36
|
}
|
37
37
|
}}
|
38
38
|
|
39
|
+
let(:options) {{}}
|
40
|
+
|
39
41
|
describe "#extracted_statements" do
|
40
|
-
subject { Example::SampleProcessor.new(document) }
|
42
|
+
subject { Example::SampleProcessor.new(document, options) }
|
41
43
|
|
42
44
|
context "when some of the registered extractors cannot be found" do
|
43
45
|
it "raises an ExtractorNotFound error" do
|
@@ -70,6 +72,28 @@ describe Tripleloop::DocumentProcessor do
|
|
70
72
|
:extractor_2 => :extracted
|
71
73
|
})
|
72
74
|
end
|
75
|
+
|
76
|
+
context "and the :only option is present" do
|
77
|
+
let(:options) {{ :only => [:foo, :baz] }}
|
78
|
+
|
79
|
+
it "executes only the extractors specified" do
|
80
|
+
subject.extracted_statements.should eq({
|
81
|
+
:foo => [[:subject, "foo-value", :object]],
|
82
|
+
:baz => [[:subject, "baz a", :object],
|
83
|
+
[:subject, "baz b", :object]]
|
84
|
+
})
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
context "and the :except option is present" do
|
89
|
+
let(:options) {{ :except=> [:bar, :baz] }}
|
90
|
+
|
91
|
+
it "executes only the extractors specified" do
|
92
|
+
subject.extracted_statements.should eq({
|
93
|
+
:foo => [[:subject, "foo-value", :object]]
|
94
|
+
})
|
95
|
+
end
|
96
|
+
end
|
73
97
|
end
|
74
98
|
end
|
75
99
|
|
@@ -86,10 +110,10 @@ describe Tripleloop::DocumentProcessor do
|
|
86
110
|
}
|
87
111
|
}
|
88
112
|
|
89
|
-
|
113
|
+
let(:options) {{}}
|
90
114
|
|
91
115
|
it "returns a hash of combined statements, grouped by extractor name" do
|
92
|
-
|
116
|
+
Example::SampleProcessor.batch_process(documents).should eq({
|
93
117
|
:foo => [
|
94
118
|
[:subject, "foo-value 0", :object],
|
95
119
|
[:subject, "foo-value 1", :object],
|
@@ -110,5 +134,15 @@ describe Tripleloop::DocumentProcessor do
|
|
110
134
|
]
|
111
135
|
})
|
112
136
|
end
|
137
|
+
|
138
|
+
it "Accepts an options array and forwards it to the constructor" do
|
139
|
+
Example::SampleProcessor.batch_process(documents, {:only => :foo}).should eq({
|
140
|
+
:foo => [
|
141
|
+
[:subject, "foo-value 0", :object],
|
142
|
+
[:subject, "foo-value 1", :object],
|
143
|
+
[:subject, "foo-value 2", :object]
|
144
|
+
],
|
145
|
+
})
|
146
|
+
end
|
113
147
|
end
|
114
148
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tripleloop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rdf
|