tripleloop 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
1
  module Tripleloop
2
2
  class DocumentProcessor
3
- attr_reader :document
3
+ attr_reader :document, :options
4
4
 
5
- def initialize(document)
5
+ def initialize(document, options={})
6
+ @options = options
6
7
  @document = Util.with_nested_fetch(document)
7
8
  end
8
9
 
@@ -21,9 +22,9 @@ module Tripleloop
21
22
  }]
22
23
  end
23
24
 
24
- def self.batch_process(documents)
25
+ def self.batch_process(documents, options={})
25
26
  documents.map { |doc|
26
- self.new(doc).extracted_statements
27
+ self.new(doc, options).extracted_statements
27
28
  }.reduce(Hash.new([])) { |accu, statements|
28
29
  accu.merge(statements) { |k, olds, news|
29
30
  olds.concat(news)
@@ -34,14 +35,25 @@ module Tripleloop
34
35
  private
35
36
  def extractor_instances
36
37
  extractors = self.class.instance_variable_get(:@extractors)
38
+ filtered_extractors = apply_filters(extractors)
37
39
 
38
- @extractor_instances ||= extractors.map { |ext, opts|
40
+ @extractor_instances ||= filtered_extractors.map { |ext, opts|
39
41
  klass = extractor_class(ext)
40
42
  context = get_context(opts[:context])
41
43
  klass.new(context)
42
44
  }
43
45
  end
44
46
 
47
+ def apply_filters(extractors)
48
+ if options[:only]
49
+ extractors.select { |k,_| Array(options[:only]).include?(k) }
50
+ elsif options[:except]
51
+ extractors.reject { |k, _| Array(options[:except]).include?(k) }
52
+ else
53
+ extractors
54
+ end
55
+ end
56
+
45
57
  def extractor_class(extractor)
46
58
  class_name = Tripleloop::Util::String.classify("#{extractor}_extractor")
47
59
  scope.const_get(class_name)
@@ -36,8 +36,10 @@ describe Tripleloop::DocumentProcessor do
36
36
  }
37
37
  }}
38
38
 
39
+ let(:options) {{}}
40
+
39
41
  describe "#extracted_statements" do
40
- subject { Example::SampleProcessor.new(document) }
42
+ subject { Example::SampleProcessor.new(document, options) }
41
43
 
42
44
  context "when some of the registered extractors cannot be found" do
43
45
  it "raises an ExtractorNotFound error" do
@@ -70,6 +72,28 @@ describe Tripleloop::DocumentProcessor do
70
72
  :extractor_2 => :extracted
71
73
  })
72
74
  end
75
+
76
+ context "and the :only option is present" do
77
+ let(:options) {{ :only => [:foo, :baz] }}
78
+
79
+ it "executes only the extractors specified" do
80
+ subject.extracted_statements.should eq({
81
+ :foo => [[:subject, "foo-value", :object]],
82
+ :baz => [[:subject, "baz a", :object],
83
+ [:subject, "baz b", :object]]
84
+ })
85
+ end
86
+ end
87
+
88
+ context "and the :except option is present" do
89
+ let(:options) {{ :except=> [:bar, :baz] }}
90
+
91
+ it "executes only the extractors specified" do
92
+ subject.extracted_statements.should eq({
93
+ :foo => [[:subject, "foo-value", :object]]
94
+ })
95
+ end
96
+ end
73
97
  end
74
98
  end
75
99
 
@@ -86,10 +110,10 @@ describe Tripleloop::DocumentProcessor do
86
110
  }
87
111
  }
88
112
 
89
- subject { Example::SampleProcessor.batch_process(documents) }
113
+ let(:options) {{}}
90
114
 
91
115
  it "returns a hash of combined statements, grouped by extractor name" do
92
- subject.should eq({
116
+ Example::SampleProcessor.batch_process(documents).should eq({
93
117
  :foo => [
94
118
  [:subject, "foo-value 0", :object],
95
119
  [:subject, "foo-value 1", :object],
@@ -110,5 +134,15 @@ describe Tripleloop::DocumentProcessor do
110
134
  ]
111
135
  })
112
136
  end
137
+
138
+ it "Accepts an options array and forwards it to the constructor" do
139
+ Example::SampleProcessor.batch_process(documents, {:only => :foo}).should eq({
140
+ :foo => [
141
+ [:subject, "foo-value 0", :object],
142
+ [:subject, "foo-value 1", :object],
143
+ [:subject, "foo-value 2", :object]
144
+ ],
145
+ })
146
+ end
113
147
  end
114
148
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tripleloop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-05 00:00:00.000000000 Z
12
+ date: 2013-03-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rdf