extractula 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.textile ADDED
File without changes
@@ -0,0 +1,68 @@
1
+ # a basic dom based extractor. it's a generic catch all
2
+ class Extractula::DomExtractor
3
+ def extract url, html
4
+ @doc = Nokogiri::HTML(html)
5
+ extracted = Extractula::ExtractedContent.new :url => url, :title => title, :content => content
6
+ end
7
+
8
+ def title
9
+ @title ||= @doc.search("//title").first.text.strip rescue nil
10
+ end
11
+
12
+ def content
13
+ candidate_nodes = @doc.search("//div|//p|//br").collect do |node|
14
+ parent = node.parent
15
+ if node.node_name == 'div'
16
+ text_size = calculate_children_text_size(parent, "div")
17
+
18
+ if text_size > 0
19
+ {:text_size => text_size, :parent => parent}
20
+ else
21
+ nil
22
+ end
23
+ elsif node.node_name == "p"
24
+ text_size = calculate_children_text_size(parent, "p")
25
+
26
+ if text_size > 0
27
+ {:text_size => text_size, :parent => parent}
28
+ else
29
+ nil
30
+ end
31
+ elsif node.node_name == "br"
32
+ if node.previous.node_name == "text" && node.next.node_name == "text"
33
+ text_size = 0
34
+ parent.children.each do |child|
35
+ text_size += child.text.strip.size if child.node_name == "text"
36
+ end
37
+
38
+ if text_size > 0
39
+ {:text_size => text_size, :parent => parent}
40
+ else
41
+ nil
42
+ end
43
+ else
44
+ nil
45
+ end
46
+ else
47
+ nil
48
+ end
49
+ end.compact.uniq
50
+
51
+ fragment = candidate_nodes.detect {|n| n[:text_size] > 140}[:parent].inner_html.strip rescue ""
52
+ # Loofah.fragment(fragment).scrub!(:prune).to_s
53
+ end
54
+
55
+ def summary
56
+ end
57
+
58
+ def calculate_children_text_size(parent, node_type)
59
+ text_size = 0
60
+ parent.children.each do |child|
61
+ if child.node_name == node_type
62
+ child.children.each {|c| text_size += c.text.strip.size if c.node_name == "text"}
63
+ end
64
+ end
65
+
66
+ text_size
67
+ end
68
+ end
@@ -0,0 +1,32 @@
1
+ class Extractula::ExtractedContent
2
+ attr_reader :url, :title, :content
3
+
4
+ def initialize(attributes = {})
5
+ attributes.each_pair {|k, v| instance_variable_set("@#{k}", v)}
6
+ end
7
+
8
+ def summary
9
+ return @summary if @summary
10
+ @content_doc ||= Nokogiri::HTML(@content)
11
+ content_fragment = @content_doc.inner_text.slice(0, 350)
12
+ sentence_break = content_fragment.rindex(/\?|\.|\!|\;/)
13
+ if sentence_break
14
+ @summary = content_fragment.slice(0, sentence_break + 1)
15
+ @summary
16
+ else
17
+ @summary = content_fragment
18
+ end
19
+ end
20
+
21
+ def image_urls
22
+ return @image_urls if @image_urls
23
+ @content_doc ||= Nokogiri::HTML(@content)
24
+ @image_urls = @content_doc.search("//img").collect {|t| t["src"]}
25
+ end
26
+
27
+ def video_embed
28
+ return @video_embed if @video_embed
29
+ @content_doc ||= Nokogiri::HTML(@content)
30
+ @content_doc.search("//object").collect {|t| t.to_html}.first
31
+ end
32
+ end
data/lib/extractula.rb ADDED
@@ -0,0 +1,24 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
+
3
+ module Extractula; end
4
+
5
+ require 'nokogiri'
6
+ require 'extractula/extracted_content'
7
+ require 'extractula/dom_extractor'
8
+
9
+ module Extractula
10
+ @extractors = []
11
+
12
+ def self.add_extractor(extractor_class)
13
+ @extractors << extractor_class
14
+ end
15
+
16
+ def self.remove_extractor(extractor_class)
17
+ @extractors.delete extractor_class
18
+ end
19
+
20
+ def self.extract(url, html)
21
+ extractor = @extractors.detect {|e| e.can_extract? url, html} || DomExtractor
22
+ extractor.new.extract(url, html)
23
+ end
24
+ end
@@ -0,0 +1,109 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "dom extractor" do
4
+ it "returns an extracted content object with the url set" do
5
+ result = Extractula::DomExtractor.new.extract("http://pauldix.net", "")
6
+ result.should be_a Extractula::ExtractedContent
7
+ result.url.should == "http://pauldix.net"
8
+ end
9
+ end
10
+
11
+ describe "extraction cases" do
12
+ describe "extracting from a typepad blog" do
13
+ before(:all) do
14
+ @extracted_content = Extractula::DomExtractor.new.extract(
15
+ "http://www.pauldix.net/2009/10/typhoeus-the-best-ruby-http-client-just-got-better.html",
16
+ read_test_file("typhoeus-the-best-ruby-http-client-just-got-better.html"))
17
+ end
18
+
19
+ it "extracts the title" do
20
+ @extracted_content.title.should == "Paul Dix Explains Nothing: Typhoeus, the best Ruby HTTP client just got better"
21
+ end
22
+
23
+ it "extracts the content" do
24
+ @extracted_content.content.should == "<p>I've been quietly working on Typhoeus for the last few months. With the help of <a href=\"http://metaclass.org/\">Wilson Bilkovich</a> and <a href=\"http://github.com/dbalatero\">David Balatero</a> I've finished what I think is a significant improvement to the library. The new interface removes all the magic and opts instead for clarity.</p>\n<p>It's really slick and includes improved stubing support, caching, memoization, and (of course) parallelism. The <a href=\"http://github.com/pauldix/typhoeus/\">Typhoeus readme</a> highlights all of the awesomeness. It should be noted that the old interface of including Typhoeus into classes and defining remote methods has been deprecated. I'll be removing that sometime in the next six months.</p>\n<p>In addition to thanking everyone using the library and everyone contributing, I should also thank my employer kgbweb. If you're a solid Rubyist that likes parsing, crawling, and stuff, or a machine learning guy, or a Solr/Lucene/indexing bad ass, let me know. We need you and we're doing some crazy awesome stuff.</p>"
25
+ end
26
+ end
27
+
28
+ describe "extracting from wordpress - techcrunch" do
29
+ before(:all) do
30
+ @extracted_content = Extractula::DomExtractor.new.extract(
31
+ "http://www.techcrunch.com/2009/12/29/totlol-youtube/",
32
+ read_test_file("totlol-youtube.html"))
33
+ end
34
+
35
+ it "extracts the title" do
36
+ @extracted_content.title.should == "The Sad Tale Of Totlol And How YouTube’s Changing TOS Made It Hard To Make A Buck"
37
+ end
38
+
39
+ it "extracts the content" do
40
+ @extracted_content.content.should == Nokogiri::HTML(read_test_file("totlol-youtube.html")).css("div.entry").first.inner_html.strip
41
+ end
42
+ end
43
+
44
+ describe "extracting from wordpress - mashable" do
45
+ before(:all) do
46
+ @extracted_content = Extractula::DomExtractor.new.extract(
47
+ "http://mashable.com/2009/12/29/ustream-new-years-eve/",
48
+ read_test_file("ustream-new-years-eve.html"))
49
+ end
50
+
51
+ it "extracts the title" do
52
+ @extracted_content.title.should == "New Years Eve: Watch Live Celebrations on Ustream"
53
+ end
54
+
55
+ it "extracts the content" do
56
+ @extracted_content.content.should == Nokogiri::HTML(read_test_file("ustream-new-years-eve.html")).css("div.text-content").first.inner_html.strip
57
+ end
58
+
59
+ it "extracts content with a video embed" do
60
+ extracted = Extractula::DomExtractor.new.extract(
61
+ "http://mashable.com/2009/12/30/weather-channel-marriage-proposal-touching-with-a-chance-of-viral-status-video/",
62
+ read_test_file("weather-channel-marriage-proposal-touching-with-a-chance-of-viral-status-video.html"))
63
+ extracted.content.should == "<div style=\"float: left; margin-right: 10px; margin-bottom: 4px;\">\n<div class=\"wdt_button\"><iframe scrolling=\"no\" height=\"61\" frameborder=\"0\" width=\"50\" src=\"http://api.tweetmeme.com/widget.js?url=http://mashable.com/2009/12/30/weather-channel-marriage-proposal-touching-with-a-chance-of-viral-status-video/&amp;style=normal&amp;source=mashable&amp;service=bit.ly\"></iframe></div>\n<div class=\"wdt_button\" style=\"height:59px;\">\n<a name=\"fb_share\" type=\"box_count\" share_url=\"http://mashable.com/2009/12/30/weather-channel-marriage-proposal-touching-with-a-chance-of-viral-status-video/\"></a>\n</div>\n</div>\n<p><a href=\"http://mashable.com/wp-content/uploads/2009/12/weather.jpg\"><img src=\"http://mashable.com/wp-content/uploads/2009/12/weather.jpg\" alt=\"\" title=\"weather\" width=\"266\" height=\"184\" class=\"alignright size-full wp-image-174336\"></a>First <a href=\"http://mashable.com/tag/twitter/\">Twitter</a>, then Foursquare, now the Weather Channel? People are broadcasting their wedding proposals all over the place these days. </p>\n<p>That’s right, the other night Weather Channel meteorologist Kim Perez’s beau, police Sgt. Marty Cunningham (best name EVER), asked her to marry him during a routine forecast. Good thing she said yes, otherwise Cunningham’s disposition would have been cloudy with a serious chance of all-out mortification.<br><span id=\"more-174310\"></span></p>\n<p>Social media and viral videos have taken the place of the jumbotron when it comes to marriage proposals, allowing one to sound one’s not-so barbaric yawp over the roofs of the world. In today’s look-at-me society, public proposals are probably the least offensive byproduct. Meaning that even the most hardened of cynics can admit that they’re kind of sweet.</p>\n<p>Check out Cunningham’s proposal below (I personally enjoy that the weather map reads “<em>ring</em>ing in the New Year”), and then dive right into our list of even more social media wooers. What’s next? Entire domains dedicated to popping the question?</p>\n<p></p>\n<center>\n<object width=\"425\" height=\"344\"><param name=\"movie\" value=\"http://www.youtube.com/v/0dHTIGas4CA&amp;color1=0x3a3a3a&amp;color2=0x999999&amp;hl=en_US&amp;feature=player_embedded&amp;fs=1\">\n<param name=\"allowFullScreen\" value=\"true\">\n<param name=\"allowScriptAccess\" value=\"always\">\n<embed wmode=\"opaque\" src=\"http://www.youtube.com/v/0dHTIGas4CA&amp;color1=0x3a3a3a&amp;color2=0x999999&amp;hl=en_US&amp;feature=player_embedded&amp;fs=1\" type=\"application/x-shockwave-flash\" allowfullscreen=\"true\" allowscriptaccess=\"always\" width=\"425\" height=\"344\"></embed></object>\n<p></p>\n</center>\n<hr>\n<h2>More Wedding Bells and Whistles</h2>\n<hr>\n<p><a href=\"http://mashable.com/2009/08/28/mashable-marriage-proposal/\">CONGRATS: Mashable Marriage Proposal Live at #SocialGood [Video]</a></p>\n<p><a href=\"http://mashable.com/2009/12/19/foursquare-proposal/\">Man Proposes Marriage via Foursquare Check-In</a></p>\n<p><a href=\"http://mashable.com/2008/03/21/max-emily-twitter-proposal/\">Did We Just Witness a Twitter Marriage Proposal?</a></p>\n<p><a href=\"http://mashable.com/2009/06/30/twitter-marriage/\">Successful Marriage Proposal on Twitter Today: We #blamedrewscancer</a></p>\n<p><a href=\"http://mashable.com/2009/12/01/groom-facebook-update/\">Just Married: Groom Changes Facebook Relationship Status at the Altar [VIDEO]</a></p>"
64
+ end
65
+ end
66
+
67
+ describe "extracting from alleyinsider" do
68
+ before(:all) do
69
+ @extracted_content = Extractula::DomExtractor.new.extract(
70
+ "http://www.businessinsider.com/10-stunning-web-site-prototype-sketches-2009-12",
71
+ read_test_file("10-stunning-web-site-prototype-sketches.html"))
72
+ end
73
+
74
+ it "extracts the title" do
75
+ @extracted_content.title.should == "10 Stunning Web Site Prototype Sketches"
76
+ end
77
+
78
+ it "extracts the content" do
79
+ @extracted_content.content.should == Nokogiri::HTML(read_test_file("10-stunning-web-site-prototype-sketches.html")).css("div.KonaBody").first.inner_html.strip
80
+ end
81
+ end
82
+
83
+ describe "extracting from nytimes" do
84
+ before(:all) do
85
+ @front_page = Extractula::DomExtractor.new.extract(
86
+ "http://www.nytimes.com/",
87
+ read_test_file("nytimes.html"))
88
+ @story_page = Extractula::DomExtractor.new.extract(
89
+ "http://www.nytimes.com/2009/12/31/world/asia/31history.html?_r=1&hp",
90
+ read_test_file("nytimes_story.html"))
91
+ end
92
+
93
+ it "extracts the title" do
94
+ @front_page.title.should == "The New York Times - Breaking News, World News & Multimedia"
95
+ end
96
+
97
+ it "extracts the content" do
98
+ @front_page.content.should == Nokogiri::HTML(read_test_file("nytimes.html")).css("div.story").first.inner_html.strip
99
+ end
100
+
101
+ it "extracts a story title" do
102
+ @story_page.title.should == "Army Historians Document Early Missteps in Afghanistan - NYTimes.com"
103
+ end
104
+
105
+ it "extracts the story content" do
106
+ @story_page.content.should == Nokogiri::HTML(read_test_file("nytimes_story.html")).css("nyt_text").first.inner_html.strip
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,48 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper'
2
+
3
+ describe "extracted content" do
4
+ it "has a url" do
5
+ Extractula::ExtractedContent.new(:url => "http://pauldix.net").url.should == "http://pauldix.net"
6
+ end
7
+
8
+ it "has a title" do
9
+ Extractula::ExtractedContent.new(:title => "whatevs").title.should == "whatevs"
10
+ end
11
+
12
+ it "has content" do
13
+ Extractula::ExtractedContent.new(:content => "some content").content.should == "some content"
14
+ end
15
+
16
+ describe "summary" do
17
+ it "has a summary" do
18
+ Extractula::ExtractedContent.new(:summary => "a summary!").summary.should == "a summary!"
19
+ end
20
+
21
+ it "generates the summary from the content" do
22
+ extracted = Extractula::ExtractedContent.new(:content => "<p>I've been quietly working on Typhoeus for the last few months. With the help of <a href=\"http://metaclass.org/\">Wilson Bilkovich</a> and <a href=\"http://github.com/dbalatero\">David Balatero</a> I've finished what I think is a significant improvement to the library. The new interface removes all the magic and opts instead for clarity.</p>\n<p>It's really slick and includes improved stubing support, caching, memoization, and (of course) parallelism. The <a href=\"http://github.com/pauldix/typhoeus/\">Typhoeus readme</a> highlights all of the awesomeness. It should be noted that the old interface of including Typhoeus into classes and defining remote methods has been deprecated. I'll be removing that sometime in the next six months.</p>\n<p>In addition to thanking everyone using the library and everyone contributing, I should also thank my employer kgbweb. If you're a solid Rubyist that likes parsing, crawling, and stuff, or a machine learning guy, or a Solr/Lucene/indexing bad ass, let me know. We need you and we're doing some crazy awesome stuff.</p>")
23
+ extracted.summary.should == "I've been quietly working on Typhoeus for the last few months. With the help of Wilson Bilkovich and David Balatero I've finished what I think is a significant improvement to the library. The new interface removes all the magic and opts instead for clarity."
24
+ end
25
+ end
26
+
27
+ describe "image_urls" do
28
+ it "has image_urls" do
29
+ Extractula::ExtractedContent.new(:image_urls => ["first.jpg", "second.tiff"]).image_urls.should == ["first.jpg", "second.tiff"]
30
+ end
31
+
32
+ it "generates the image urls from the content" do
33
+ extracted = Extractula::ExtractedContent.new(:content => "<p><a href=\"http://www.businessinsider.com/10-stunning-web-site-prototype-sketches-2009-12/early-ember-1\"><img class=\"float_right\" src=\"http://static.businessinsider.com/~~/f?id=4b3a466f000000000086e662&amp;maxX=311&amp;maxY=233\" border=\"0\" height=\"233\" alt=\"Web site wireframes\" width=\"311\"></a></p>\n<div style=\"float: left; padding: 15px 15px 15px 0;\">\n\n</div>\n<p>When designers start a new Web site, they often sketch out a first idea of the page layout using paper and stencil.&nbsp;</p>\n<p>Designers call this sketch a \"wireframe.\"</p>\n<p>Woorkup.com's Antonio Lupetti <a href=\"http://woorkup.com/2009/12/28/10-beautiful-sketches-for-website-prototypes/\">collected</a> 10 beautiful examples of wireframes.</p>\n<p><a href=\"http://www.businessinsider.com/10-stunning-web-site-prototype-sketches-2009-12/early-ember-1\"><strong>He gave us permission to republish them here &gt;</strong></a></p>")
34
+ extracted.image_urls.should == ["http://static.businessinsider.com/~~/f?id=4b3a466f000000000086e662&maxX=311&maxY=233"]
35
+ end
36
+ end
37
+
38
+ describe "video_embed" do
39
+ it "has a video_embed" do
40
+ Extractula::ExtractedContent.new(:video_embed => "some embed code").video_embed.should == "some embed code"
41
+ end
42
+
43
+ it "pulls video embed tags from the content" do
44
+ extracted = Extractula::ExtractedContent.new(:content => "<div style=\"float: left; margin-right: 10px; margin-bottom: 4px;\">\n<div class=\"wdt_button\"><iframe scrolling=\"no\" height=\"61\" frameborder=\"0\" width=\"50\" src=\"http://api.tweetmeme.com/widget.js?url=http://mashable.com/2009/12/30/weather-channel-marriage-proposal-touching-with-a-chance-of-viral-status-video/&amp;style=normal&amp;source=mashable&amp;service=bit.ly\"></iframe></div>\n<div class=\"wdt_button\" style=\"height:59px;\">\n<a name=\"fb_share\" type=\"box_count\" share_url=\"http://mashable.com/2009/12/30/weather-channel-marriage-proposal-touching-with-a-chance-of-viral-status-video/\"></a>\n</div>\n</div>\n<p><a href=\"http://mashable.com/wp-content/uploads/2009/12/weather.jpg\"><img src=\"http://mashable.com/wp-content/uploads/2009/12/weather.jpg\" alt=\"\" title=\"weather\" width=\"266\" height=\"184\" class=\"alignright size-full wp-image-174336\"></a>First <a href=\"http://mashable.com/tag/twitter/\">Twitter</a>, then Foursquare, now the Weather Channel? People are broadcasting their wedding proposals all over the place these days. </p>\n<p>That’s right, the other night Weather Channel meteorologist Kim Perez’s beau, police Sgt. Marty Cunningham (best name EVER), asked her to marry him during a routine forecast. Good thing she said yes, otherwise Cunningham’s disposition would have been cloudy with a serious chance of all-out mortification.<br><span id=\"more-174310\"></span></p>\n<p>Social media and viral videos have taken the place of the jumbotron when it comes to marriage proposals, allowing one to sound one’s not-so barbaric yawp over the roofs of the world. In today’s look-at-me society, public proposals are probably the least offensive byproduct. Meaning that even the most hardened of cynics can admit that they’re kind of sweet.</p>\n<p>Check out Cunningham’s proposal below (I personally enjoy that the weather map reads “<em>ring</em>ing in the New Year”), and then dive right into our list of even more social media wooers. What’s next? Entire domains dedicated to popping the question?</p>\n<p></p>\n<center>\n<object width=\"425\" height=\"344\"><param name=\"movie\" value=\"http://www.youtube.com/v/0dHTIGas4CA&amp;color1=0x3a3a3a&amp;color2=0x999999&amp;hl=en_US&amp;feature=player_embedded&amp;fs=1\">\n<param name=\"allowFullScreen\" value=\"true\">\n<param name=\"allowScriptAccess\" value=\"always\">\n<embed wmode=\"opaque\" src=\"http://www.youtube.com/v/0dHTIGas4CA&amp;color1=0x3a3a3a&amp;color2=0x999999&amp;hl=en_US&amp;feature=player_embedded&amp;fs=1\" type=\"application/x-shockwave-flash\" allowfullscreen=\"true\" allowscriptaccess=\"always\" width=\"425\" height=\"344\"></embed></object>\n<p></p>\n</center>\n<hr>\n<h2>More Wedding Bells and Whistles</h2>\n<hr>\n<p><a href=\"http://mashable.com/2009/08/28/mashable-marriage-proposal/\">CONGRATS: Mashable Marriage Proposal Live at #SocialGood [Video]</a></p>\n<p><a href=\"http://mashable.com/2009/12/19/foursquare-proposal/\">Man Proposes Marriage via Foursquare Check-In</a></p>\n<p><a href=\"http://mashable.com/2008/03/21/max-emily-twitter-proposal/\">Did We Just Witness a Twitter Marriage Proposal?</a></p>\n<p><a href=\"http://mashable.com/2009/06/30/twitter-marriage/\">Successful Marriage Proposal on Twitter Today: We #blamedrewscancer</a></p>\n<p><a href=\"http://mashable.com/2009/12/01/groom-facebook-update/\">Just Married: Groom Changes Facebook Relationship Status at the Altar [VIDEO]</a></p>")
45
+ extracted.video_embed.should == "<object width=\"425\" height=\"344\"><param name=\"movie\" value=\"http://www.youtube.com/v/0dHTIGas4CA&amp;color1=0x3a3a3a&amp;color2=0x999999&amp;hl=en_US&amp;feature=player_embedded&amp;fs=1\">\n<param name=\"allowFullScreen\" value=\"true\">\n<param name=\"allowScriptAccess\" value=\"always\">\n<embed wmode=\"opaque\" src=\"http://www.youtube.com/v/0dHTIGas4CA&amp;color1=0x3a3a3a&amp;color2=0x999999&amp;hl=en_US&amp;feature=player_embedded&amp;fs=1\" type=\"application/x-shockwave-flash\" allowfullscreen=\"true\" allowscriptaccess=\"always\" width=\"425\" height=\"344\"></embed></object>"
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,45 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe "extractula" do
4
+ it "can add custom extractors" do
5
+ custom_extractor = Class.new do
6
+ def self.can_extract? url, html
7
+ true
8
+ end
9
+
10
+ def extract url, html
11
+ Extractula::ExtractedContent.new :url => "custom extractor url", :summary => "my custom extractor"
12
+ end
13
+ end
14
+
15
+ Extractula.add_extractor custom_extractor
16
+ content = Extractula.extract("http://pauldix.net", "some html")
17
+ content.url.should == "custom extractor url"
18
+ content.summary.should == "my custom extractor"
19
+ Extractula.remove_extractor custom_extractor
20
+ end
21
+
22
+ it "skips custom extractors that can't extract the passed url and html" do
23
+ custom_extractor = Class.new do
24
+ def self.can_extract? url, html
25
+ false
26
+ end
27
+
28
+ def extract url, html
29
+ Extractula::ExtractedContent.new :url => "this url", :summary => "this summary"
30
+ end
31
+ end
32
+
33
+ Extractula.add_extractor custom_extractor
34
+ content = Extractula.extract("http://pauldix.net", "some html")
35
+ content.url.should_not == "this url"
36
+ content.summary.should_not == "this summary"
37
+ Extractula.remove_extractor custom_extractor
38
+ end
39
+
40
+ it "extracts from a url and document and returns an ExtractedContent object" do
41
+ result = Extractula.extract("http://pauldix.net", "")
42
+ result.should be_a Extractula::ExtractedContent
43
+ result.url.should == "http://pauldix.net"
44
+ end
45
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --diff
2
+ --color
@@ -0,0 +1,14 @@
1
+ require "rubygems"
2
+ require "spec"
3
+
4
+ # gem install redgreen for colored test output
5
+ begin require "redgreen" unless ENV['TM_CURRENT_LINE']; rescue LoadError; end
6
+
7
+ path = File.expand_path(File.dirname(__FILE__) + "/../lib/")
8
+ $LOAD_PATH.unshift(path) unless $LOAD_PATH.include?(path)
9
+
10
+ require "lib/extractula"
11
+
12
+ def read_test_file(file_name)
13
+ File.read("#{File.dirname(__FILE__)}/test-files/#{file_name}")
14
+ end