tripleloop 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,8 +1,9 @@
1
1
  module Tripleloop
2
2
  class DocumentProcessor
3
- attr_reader :document
3
+ attr_reader :document, :options
4
4
 
5
- def initialize(document)
5
+ def initialize(document, options={})
6
+ @options = options
6
7
  @document = Util.with_nested_fetch(document)
7
8
  end
8
9
 
@@ -21,9 +22,9 @@ module Tripleloop
21
22
  }]
22
23
  end
23
24
 
24
- def self.batch_process(documents)
25
+ def self.batch_process(documents, options={})
25
26
  documents.map { |doc|
26
- self.new(doc).extracted_statements
27
+ self.new(doc, options).extracted_statements
27
28
  }.reduce(Hash.new([])) { |accu, statements|
28
29
  accu.merge(statements) { |k, olds, news|
29
30
  olds.concat(news)
@@ -34,14 +35,25 @@ module Tripleloop
34
35
  private
35
36
  def extractor_instances
36
37
  extractors = self.class.instance_variable_get(:@extractors)
38
+ filtered_extractors = apply_filters(extractors)
37
39
 
38
- @extractor_instances ||= extractors.map { |ext, opts|
40
+ @extractor_instances ||= filtered_extractors.map { |ext, opts|
39
41
  klass = extractor_class(ext)
40
42
  context = get_context(opts[:context])
41
43
  klass.new(context)
42
44
  }
43
45
  end
44
46
 
47
+ def apply_filters(extractors)
48
+ if options[:only]
49
+ extractors.select { |k,_| Array(options[:only]).include?(k) }
50
+ elsif options[:except]
51
+ extractors.reject { |k, _| Array(options[:except]).include?(k) }
52
+ else
53
+ extractors
54
+ end
55
+ end
56
+
45
57
  def extractor_class(extractor)
46
58
  class_name = Tripleloop::Util::String.classify("#{extractor}_extractor")
47
59
  scope.const_get(class_name)
@@ -36,8 +36,10 @@ describe Tripleloop::DocumentProcessor do
36
36
  }
37
37
  }}
38
38
 
39
+ let(:options) {{}}
40
+
39
41
  describe "#extracted_statements" do
40
- subject { Example::SampleProcessor.new(document) }
42
+ subject { Example::SampleProcessor.new(document, options) }
41
43
 
42
44
  context "when some of the registered extractors cannot be found" do
43
45
  it "raises an ExtractorNotFound error" do
@@ -70,6 +72,28 @@ describe Tripleloop::DocumentProcessor do
70
72
  :extractor_2 => :extracted
71
73
  })
72
74
  end
75
+
76
+ context "and the :only option is present" do
77
+ let(:options) {{ :only => [:foo, :baz] }}
78
+
79
+ it "executes only the extractors specified" do
80
+ subject.extracted_statements.should eq({
81
+ :foo => [[:subject, "foo-value", :object]],
82
+ :baz => [[:subject, "baz a", :object],
83
+ [:subject, "baz b", :object]]
84
+ })
85
+ end
86
+ end
87
+
88
+ context "and the :except option is present" do
89
+ let(:options) {{ :except=> [:bar, :baz] }}
90
+
91
+ it "executes only the extractors specified" do
92
+ subject.extracted_statements.should eq({
93
+ :foo => [[:subject, "foo-value", :object]]
94
+ })
95
+ end
96
+ end
73
97
  end
74
98
  end
75
99
 
@@ -86,10 +110,10 @@ describe Tripleloop::DocumentProcessor do
86
110
  }
87
111
  }
88
112
 
89
- subject { Example::SampleProcessor.batch_process(documents) }
113
+ let(:options) {{}}
90
114
 
91
115
  it "returns a hash of combined statements, grouped by extractor name" do
92
- subject.should eq({
116
+ Example::SampleProcessor.batch_process(documents).should eq({
93
117
  :foo => [
94
118
  [:subject, "foo-value 0", :object],
95
119
  [:subject, "foo-value 1", :object],
@@ -110,5 +134,15 @@ describe Tripleloop::DocumentProcessor do
110
134
  ]
111
135
  })
112
136
  end
137
+
138
+ it "Accepts an options array and forwards it to the constructor" do
139
+ Example::SampleProcessor.batch_process(documents, {:only => :foo}).should eq({
140
+ :foo => [
141
+ [:subject, "foo-value 0", :object],
142
+ [:subject, "foo-value 1", :object],
143
+ [:subject, "foo-value 2", :object]
144
+ ],
145
+ })
146
+ end
113
147
  end
114
148
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tripleloop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-05 00:00:00.000000000 Z
12
+ date: 2013-03-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rdf