extractula 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/extractula.rb +36 -21
  2. data/spec/extractula_spec.rb +45 -2
  3. metadata +1 -1
@@ -9,29 +9,44 @@ require 'extractula/extracted_content'
9
9
  require 'extractula/extractor'
10
10
 
11
11
  module Extractula
12
- VERSION = "0.0.8"
12
+ VERSION = "0.0.9"
13
13
 
14
14
  @extractors = []
15
15
 
16
- def self.add_extractor(extractor_class)
17
- @extractors << extractor_class
18
- end
19
-
20
- def self.remove_extractor(extractor_class)
21
- @extractors.delete extractor_class
22
- end
23
-
24
- def self.extract(url, html)
25
- parsed_url = Domainatrix.parse(url)
26
- parsed_html = Nokogiri::HTML(html)
27
- extractor = @extractors.detect {|e| e.can_extract? parsed_url, parsed_html} || Extractor
28
- extractor.new(parsed_url, parsed_html).extract
29
- end
30
-
31
- def self.custom_extractor(config = {})
32
- klass = Class.new(Extractula::Extractor)
33
- klass.include(Extractula::OEmbed) if config.delete(:oembed)
34
- config.each { |option, args| klass.__send__(option, *args) }
35
- klass
16
+ class << self
17
+
18
+ attr_reader :extractors, :last_extractor
19
+
20
+ def add_extractor(extractor_class)
21
+ @extractors << extractor_class
22
+ end
23
+
24
+ def remove_extractor(extractor_class)
25
+ @extractors.delete extractor_class
26
+ end
27
+
28
+ def extract(url, html)
29
+ parsed_url, parsed_html = Domainatrix.parse(url), Nokogiri::HTML(html)
30
+ extractor = select_extractor parsed_url, parsed_html
31
+ extractor.new(parsed_url, parsed_html).extract
32
+ end
33
+
34
+ def select_extractor url, html
35
+ @last_extractor = @extractors.detect {|e| e.can_extract? url, html} || Extractor
36
+ end
37
+
38
+ def custom_extractor(*args, &block)
39
+ config = args.last.is_a?(Hash) ? args.pop : {}
40
+ klass_name = args[0]
41
+ if block_given?
42
+ klass = Class.new Extractula::Extractor, &block
43
+ else
44
+ klass = Class.new Extractula::Extractor
45
+ klass.__send__ :include, Extractula::OEmbed if config.delete(:oembed)
46
+ config.each { |option, args| klass.__send__(option, *args) }
47
+ end
48
+ const_set klass_name, klass if klass_name
49
+ klass
50
+ end
36
51
  end
37
52
  end
@@ -12,7 +12,7 @@ describe "extractula" do
12
12
  end
13
13
  end
14
14
 
15
- Extractula.add_extractor custom_extractor
15
+ # Extractula.add_extractor custom_extractor
16
16
  content = Extractula.extract("http://pauldix.net", "some html")
17
17
  content.url.should == "custom extractor url"
18
18
  content.summary.should == "my custom extractor"
@@ -30,7 +30,7 @@ describe "extractula" do
30
30
  end
31
31
  end
32
32
 
33
- Extractula.add_extractor custom_extractor
33
+ # Extractula.add_extractor custom_extractor
34
34
  content = Extractula.extract("http://pauldix.net", "some html")
35
35
  content.url.should_not == "this url"
36
36
  content.summary.should_not == "this summary"
@@ -42,4 +42,47 @@ describe "extractula" do
42
42
  result.should be_a Extractula::ExtractedContent
43
43
  result.url.should == "http://pauldix.net"
44
44
  end
45
+
46
+ it "saves a reference to the last extractor used" do
47
+ custom_extractor = Class.new(Extractula::Extractor) do
48
+ def self.can_extract? url, html
49
+ true
50
+ end
51
+ end
52
+ Extractula.extract "http://pauldix.net", "some html"
53
+ Extractula.last_extractor.should == custom_extractor
54
+ Extractula.remove_extractor custom_extractor
55
+ end
56
+
57
+ describe "defining an inline custom extractor" do
58
+ it "takes a block form definition" do
59
+ klass = Extractula.custom_extractor do
60
+ domain 'pauldix'
61
+ content_path '#content'
62
+ end
63
+ Extractula.extractors.should include(klass)
64
+ Extractula.remove_extractor klass
65
+ end
66
+
67
+ it "takes a hash form definition" do
68
+ klass = Extractula.custom_extractor :domain => 'pauldix', :content_path => '#content'
69
+ Extractula.extractors.should include(klass)
70
+ Extractula.remove_extractor klass
71
+ end
72
+
73
+ it "can be named" do
74
+ klass = Extractula.custom_extractor :PaulDix do
75
+ domain 'pauldix'
76
+ content_path '#content'
77
+ end
78
+ Extractula.const_defined?(:PaulDix).should be_true
79
+ Extractula.remove_extractor klass
80
+ end
81
+
82
+ it "can contain the OEmbed module" do
83
+ klass = Extractula.custom_extractor :oembed => true
84
+ klass.should include(Extractula::OEmbed)
85
+ Extractula.remove_extractor klass
86
+ end
87
+ end
45
88
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extractula
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paul Dix