term-extractor 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,3 @@
1
+ == 1.0.0 / 2008-05-28
2
+
3
+ * First release of Term Extractor, only providing access to the Yahoo Term Extractor API.
data/Manifest.txt ADDED
@@ -0,0 +1,24 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/term_extractor.rb
6
+ lib/term_extractor/exceptions.rb
7
+ lib/term_extractor/parsers/yahoo.rb
8
+ spec/assets/content.sample
9
+ spec/assets/yahoo_valid_response.xml
10
+ spec/spec_helper.rb
11
+ spec/term_extractor_spec.rb
12
+ tasks/ann.rake
13
+ tasks/bones.rake
14
+ tasks/gem.rake
15
+ tasks/manifest.rake
16
+ tasks/notes.rake
17
+ tasks/post_load.rake
18
+ tasks/rdoc.rake
19
+ tasks/rubyforge.rake
20
+ tasks/setup.rb
21
+ tasks/spec.rake
22
+ tasks/svn.rake
23
+ tasks/test.rake
24
+ test/test_term_extractor.rb
data/README.txt ADDED
@@ -0,0 +1,50 @@
1
+ Term Extractor
2
+ by Tom Taylor <tom@headshift.com>
3
+ http://www.headshift.com
4
+
5
+ == DESCRIPTION:
6
+
7
+ Extract terms from text using a selection of web based parsers.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * Just supports the Yahoo Term Extractor API at the moment. More soon.
12
+
13
+ == SYNOPSIS:
14
+
15
+ te = TermExtractor::Yahoo.new("api key here")
16
+ te.get_terms("content string here") # returns an array of terms
17
+
18
+ == REQUIREMENTS:
19
+
20
+ * Net::HTTP
21
+ * Hpricot
22
+
23
+ == INSTALL:
24
+
25
+ * gem install termextractor
26
+
27
+ == LICENSE:
28
+
29
+ (The MIT License)
30
+
31
+ Copyright (c) 2008 Headshift
32
+
33
+ Permission is hereby granted, free of charge, to any person obtaining
34
+ a copy of this software and associated documentation files (the
35
+ 'Software'), to deal in the Software without restriction, including
36
+ without limitation the rights to use, copy, modify, merge, publish,
37
+ distribute, sublicense, and/or sell copies of the Software, and to
38
+ permit persons to whom the Software is furnished to do so, subject to
39
+ the following conditions:
40
+
41
+ The above copyright notice and this permission notice shall be
42
+ included in all copies or substantial portions of the Software.
43
+
44
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
45
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
46
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
47
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
48
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
49
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
50
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ # Look in the tasks/setup.rb file for the various options that can be
2
+ # configured in this Rakefile. The .rake files in the tasks directory
3
+ # are where the options are used.
4
+
5
+ load 'tasks/setup.rb'
6
+
7
+ ensure_in_path 'lib'
8
+ require 'term_extractor'
9
+
10
+ task :default => 'spec:run'
11
+
12
+ PROJ.name = 'term-extractor'
13
+ PROJ.authors = 'Tom Taylor'
14
+ PROJ.email = 'tom@headshift.com'
15
+ PROJ.url = 'http://termextractor.rubyforge.org'
16
+ PROJ.rubyforge.name = 'termextractor'
17
+ PROJ.version = TermExtractor::VERSION
18
+
19
+ PROJ.spec.opts << '--color'
20
+
21
+ depend_on 'hpricot'
22
+
23
+ # EOF
@@ -0,0 +1,56 @@
1
+ # $Id$
2
+
3
+ # Equivalent to a header guard in C/C++
4
+ # Used to prevent the class/module from being loaded more than once
5
+ unless defined? TermExtractor
6
+
7
+ module TermExtractor
8
+
9
+ # :stopdoc:
10
+ VERSION = '1.0.0'
11
+ LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
12
+ PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
13
+ # :startdoc:
14
+
15
+ # Returns the version string for the library.
16
+ #
17
+ def self.version
18
+ VERSION
19
+ end
20
+
21
+ # Returns the library path for the module. If any arguments are given,
22
+ # they will be joined to the end of the libray path using
23
+ # <tt>File.join</tt>.
24
+ #
25
+ def self.libpath( *args )
26
+ args.empty? ? LIBPATH : ::File.join(LIBPATH, *args)
27
+ end
28
+
29
+ # Returns the lpath for the module. If any arguments are given,
30
+ # they will be joined to the end of the path using
31
+ # <tt>File.join</tt>.
32
+ #
33
+ def self.path( *args )
34
+ args.empty? ? PATH : ::File.join(PATH, *args)
35
+ end
36
+
37
+ # Utility method used to rquire all files ending in .rb that lie in the
38
+ # directory below this file that has the same name as the filename passed
39
+ # in. Optionally, a specific _directory_ name can be passed in such that
40
+ # the _filename_ does not have to be equivalent to the directory.
41
+ #
42
+ def self.require_all_libs_relative_to( fname, dir = nil )
43
+ dir ||= ::File.basename(fname, '.*')
44
+ search_me = ::File.expand_path(
45
+ ::File.join(::File.dirname(fname), dir, '**', '*.rb'))
46
+
47
+ Dir.glob(search_me).sort.each {|rb| require rb}
48
+ end
49
+
50
+ end # module TermExtractor
51
+
52
+ TermExtractor.require_all_libs_relative_to __FILE__
53
+
54
+ end # unless defined?
55
+
56
+ # EOF
@@ -0,0 +1,6 @@
1
+ module TermExtractor
2
+
3
+ class Error < RuntimeError; end
4
+ class RemoteServerError < Error; end
5
+
6
+ end
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'net/http'
3
+ require 'hpricot'
4
+
5
+ module TermExtractor
6
+
7
+ # Interface to the {Yahoo Term Extractor API}[http://developer.yahoo.com/search/content/V1/termExtraction.html].
8
+
9
+ class Yahoo
10
+
11
+ HOST = "search.yahooapis.com"
12
+ PATH = "/ContentAnalysisService/V1/termExtraction"
13
+
14
+ # Create a new Yahoo Term Extractor instance. A valid API key must be provided.
15
+
16
+ def initialize(api_key)
17
+ @api_key = api_key
18
+ end
19
+
20
+ # Extracts terms from content provided. Provide an optional query to provide context for your terms.
21
+
22
+ def get_terms(content, query = nil)
23
+ response = Net::HTTP.post_form(uri, query_hash(content, query))
24
+ raise RemoteServerError unless response.code == "200"
25
+ terms = parse_for_terms(response.body)
26
+ end
27
+
28
+ private
29
+
30
+ # Parse XML from a string for terms.
31
+
32
+ def parse_for_terms(xml_string)
33
+ doc = Hpricot(xml_string)
34
+ terms = doc.search("//result").map { |r| r.inner_html }
35
+ end
36
+
37
+ def uri
38
+ URI::HTTP.build({ :host => HOST, :path => PATH })
39
+ end
40
+
41
+ def query_hash(content, query)
42
+ post_arguments = {
43
+ 'appid' => @api_key,
44
+ 'context' => content,
45
+ 'query' => query
46
+ }
47
+ end
48
+
49
+ end
50
+
51
+ end
@@ -0,0 +1 @@
1
+ English is a West Germanic language originating in England, and is the first language for most people in the Anglophone Caribbean, Australia, Canada, New Zealand, Ireland, the United Kingdom, and the United States (sometimes referred to as the Anglosphere). It is used extensively as a second language and as an official language throughout the world, especially in Commonwealth countries and in many international organizations. A native or fluent speaker of English is known as an Anglophone.
@@ -0,0 +1,5 @@
1
+ <ResultSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:yahoo:cate" xsi:schemaLocation="urn:yahoo:cate http://search.yahooapis.com/ContentAnalysisService/V1/TermExtractionResponse.xsd">
2
+ <Result>english</Result>
3
+ <Result>waffles</Result>
4
+ <Result>stuff like that</Result>
5
+ </ResultSet>
@@ -0,0 +1,22 @@
1
+ # $Id$
2
+
3
+ require File.expand_path(
4
+ File.join(File.dirname(__FILE__), %w[.. lib term_extractor]))
5
+
6
+ Spec::Runner.configure do |config|
7
+ # == Mock Framework
8
+ #
9
+ # RSpec uses it's own mocking framework by default. If you prefer to
10
+ # use mocha, flexmock or RR, uncomment the appropriate line:
11
+ #
12
+ # config.mock_with :mocha
13
+ # config.mock_with :flexmock
14
+ # config.mock_with :rr
15
+ end
16
+
17
+ def asset_as_string(path)
18
+ file = File.join('spec/assets', path)
19
+ IO.read(file)
20
+ end
21
+
22
+ # EOF
@@ -0,0 +1,43 @@
1
+ # $Id$
2
+
3
+ require File.join(File.dirname(__FILE__), %w[spec_helper])
4
+
5
+ describe TermExtractor::Yahoo, "which is extracting the terms from some text" do
6
+
7
+ def mock_valid_response
8
+ mock(Net::HTTPOK, :code => "200", :body => asset_as_string("yahoo_valid_response.xml"))
9
+ end
10
+
11
+ def mock_invalid_response
12
+ mock(Net::HTTPInternalServerError, :code => "500", :body => "")
13
+ end
14
+
15
+ describe "and the server responses as expected" do
16
+ before(:each) do
17
+ Net::HTTP.should_receive(:post_form).and_return(mock_valid_response)
18
+ @te = TermExtractor::Yahoo.new("waffles")
19
+ end
20
+
21
+ it "should return the correct set of terms" do
22
+ content = asset_as_string("content.sample")
23
+ @te.get_terms(content).should == ["english", "waffles", "stuff like that"]
24
+ end
25
+ end
26
+
27
+ describe "and the server responds badly" do
28
+ before(:each) do
29
+ Net::HTTP.should_receive(:post_form).and_return(mock_invalid_response)
30
+ @te = TermExtractor::Yahoo.new("waffles")
31
+ end
32
+
33
+ it "should raise an error" do
34
+ content = asset_as_string("content.sample")
35
+ lambda {@te.get_terms(content)}.should raise_error(TermExtractor::RemoteServerError)
36
+ end
37
+
38
+ end
39
+
40
+
41
+ end
42
+
43
+ # EOF
data/tasks/ann.rake ADDED
@@ -0,0 +1,81 @@
1
+ # $Id$
2
+
3
+ begin
4
+ require 'bones/smtp_tls'
5
+ rescue LoadError
6
+ require 'net/smtp'
7
+ end
8
+ require 'time'
9
+
10
+ namespace :ann do
11
+
12
+ # A prerequisites task that all other tasks depend upon
13
+ task :prereqs
14
+
15
+ file PROJ.ann.file do
16
+ ann = PROJ.ann
17
+ puts "Generating #{ann.file}"
18
+ File.open(ann.file,'w') do |fd|
19
+ fd.puts("#{PROJ.name} version #{PROJ.version}")
20
+ fd.puts(" by #{Array(PROJ.authors).first}") if PROJ.authors
21
+ fd.puts(" #{PROJ.url}") if PROJ.url.valid?
22
+ fd.puts(" (the \"#{PROJ.release_name}\" release)") if PROJ.release_name
23
+ fd.puts
24
+ fd.puts("== DESCRIPTION")
25
+ fd.puts
26
+ fd.puts(PROJ.description)
27
+ fd.puts
28
+ fd.puts(PROJ.changes.sub(%r/^.*$/, '== CHANGES'))
29
+ fd.puts
30
+ ann.paragraphs.each do |p|
31
+ fd.puts "== #{p.upcase}"
32
+ fd.puts
33
+ fd.puts paragraphs_of(PROJ.readme_file, p).join("\n\n")
34
+ fd.puts
35
+ end
36
+ fd.puts ann.text if ann.text
37
+ end
38
+ end
39
+
40
+ desc "Create an announcement file"
41
+ task :announcement => ['ann:prereqs', PROJ.ann.file]
42
+
43
+ desc "Send an email announcement"
44
+ task :email => ['ann:prereqs', PROJ.ann.file] do
45
+ ann = PROJ.ann
46
+ from = ann.email[:from] || PROJ.email
47
+ to = Array(ann.email[:to])
48
+
49
+ ### build a mail header for RFC 822
50
+ rfc822msg = "From: #{from}\n"
51
+ rfc822msg << "To: #{to.join(',')}\n"
52
+ rfc822msg << "Subject: [ANN] #{PROJ.name} #{PROJ.version}"
53
+ rfc822msg << " (#{PROJ.release_name})" if PROJ.release_name
54
+ rfc822msg << "\n"
55
+ rfc822msg << "Date: #{Time.new.rfc822}\n"
56
+ rfc822msg << "Message-Id: "
57
+ rfc822msg << "<#{"%.8f" % Time.now.to_f}@#{ann.email[:domain]}>\n\n"
58
+ rfc822msg << File.read(ann.file)
59
+
60
+ params = [:server, :port, :domain, :acct, :passwd, :authtype].map do |key|
61
+ ann.email[key]
62
+ end
63
+
64
+ params[3] = PROJ.email if params[3].nil?
65
+
66
+ if params[4].nil?
67
+ STDOUT.write "Please enter your e-mail password (#{params[3]}): "
68
+ params[4] = STDIN.gets.chomp
69
+ end
70
+
71
+ ### send email
72
+ Net::SMTP.start(*params) {|smtp| smtp.sendmail(rfc822msg, from, to)}
73
+ end
74
+ end # namespace :ann
75
+
76
+ desc 'Alias to ann:announcement'
77
+ task :ann => 'ann:announcement'
78
+
79
+ CLOBBER << PROJ.ann.file
80
+
81
+ # EOF
data/tasks/bones.rake ADDED
@@ -0,0 +1,21 @@
1
+ # $Id$
2
+
3
+ if HAVE_BONES
4
+
5
+ namespace :bones do
6
+
7
+ desc 'Show the PROJ open struct'
8
+ task :debug do |t|
9
+ atr = if t.application.top_level_tasks.length == 2
10
+ t.application.top_level_tasks.pop
11
+ end
12
+
13
+ if atr then Bones::Debug.show_attr(PROJ, atr)
14
+ else Bones::Debug.show PROJ end
15
+ end
16
+
17
+ end # namespace :bones
18
+
19
+ end # HAVE_BONES
20
+
21
+ # EOF
data/tasks/gem.rake ADDED
@@ -0,0 +1,121 @@
1
+ # $Id$
2
+
3
+ require 'rake/gempackagetask'
4
+
5
+ namespace :gem do
6
+
7
+ PROJ.gem._spec = Gem::Specification.new do |s|
8
+ s.name = PROJ.name
9
+ s.version = PROJ.version
10
+ s.summary = PROJ.summary
11
+ s.authors = Array(PROJ.authors)
12
+ s.email = PROJ.email
13
+ s.homepage = Array(PROJ.url).first
14
+ s.rubyforge_project = PROJ.rubyforge.name
15
+
16
+ s.description = PROJ.description
17
+
18
+ PROJ.gem.dependencies.each do |dep|
19
+ s.add_dependency(*dep)
20
+ end
21
+
22
+ s.files = PROJ.gem.files
23
+ s.executables = PROJ.gem.executables.map {|fn| File.basename(fn)}
24
+ s.extensions = PROJ.gem.files.grep %r/extconf\.rb$/
25
+
26
+ s.bindir = 'bin'
27
+ dirs = Dir["{#{PROJ.libs.join(',')}}"]
28
+ s.require_paths = dirs unless dirs.empty?
29
+
30
+ incl = Regexp.new(PROJ.rdoc.include.join('|'))
31
+ excl = PROJ.rdoc.exclude.dup.concat %w[\.rb$ ^(\.\/|\/)?ext]
32
+ excl = Regexp.new(excl.join('|'))
33
+ rdoc_files = PROJ.gem.files.find_all do |fn|
34
+ case fn
35
+ when excl; false
36
+ when incl; true
37
+ else false end
38
+ end
39
+ s.rdoc_options = PROJ.rdoc.opts + ['--main', PROJ.rdoc.main]
40
+ s.extra_rdoc_files = rdoc_files
41
+ s.has_rdoc = true
42
+
43
+ if test ?f, PROJ.test.file
44
+ s.test_file = PROJ.test.file
45
+ else
46
+ s.test_files = PROJ.test.files.to_a
47
+ end
48
+
49
+ # Do any extra stuff the user wants
50
+ PROJ.gem.extras.each do |msg, val|
51
+ case val
52
+ when Proc
53
+ val.call(s.send(msg))
54
+ else
55
+ s.send "#{msg}=", val
56
+ end
57
+ end
58
+ end # Gem::Specification.new
59
+
60
+ # A prerequisites task that all other tasks depend upon
61
+ task :prereqs
62
+
63
+ desc 'Show information about the gem'
64
+ task :debug => 'gem:prereqs' do
65
+ puts PROJ.gem._spec.to_ruby
66
+ end
67
+
68
+ pkg = Rake::PackageTask.new(PROJ.name, PROJ.version) do |pkg|
69
+ pkg.need_tar = PROJ.gem.need_tar
70
+ pkg.need_zip = PROJ.gem.need_zip
71
+ pkg.package_files += PROJ.gem._spec.files
72
+ end
73
+ Rake::Task['gem:package'].instance_variable_set(:@full_comment, nil)
74
+
75
+ gem_file = if PROJ.gem._spec.platform == Gem::Platform::RUBY
76
+ "#{pkg.package_name}.gem"
77
+ else
78
+ "#{pkg.package_name}-#{PROJ.gem._spec.platform}.gem"
79
+ end
80
+
81
+ desc "Build the gem file #{gem_file}"
82
+ task :package => ['gem:prereqs', "#{pkg.package_dir}/#{gem_file}"]
83
+
84
+ file "#{pkg.package_dir}/#{gem_file}" => [pkg.package_dir] + PROJ.gem._spec.files do
85
+ when_writing("Creating GEM") {
86
+ Gem::Builder.new(PROJ.gem._spec).build
87
+ verbose(true) {
88
+ mv gem_file, "#{pkg.package_dir}/#{gem_file}"
89
+ }
90
+ }
91
+ end
92
+
93
+ desc 'Install the gem'
94
+ task :install => [:clobber, :package] do
95
+ sh "#{SUDO} #{GEM} install --local pkg/#{PROJ.gem._spec.full_name}"
96
+
97
+ # use this version of the command for rubygems > 1.0.0
98
+ #sh "#{SUDO} #{GEM} install --no-update-sources pkg/#{PROJ.gem._spec.full_name}"
99
+ end
100
+
101
+ desc 'Uninstall the gem'
102
+ task :uninstall do
103
+ installed_list = Gem.source_index.find_name(PROJ.name)
104
+ if installed_list and installed_list.collect { |s| s.version.to_s}.include?(PROJ.version) then
105
+ sh "#{SUDO} #{GEM} uninstall --version '#{PROJ.version}' --ignore-dependencies --executables #{PROJ.name}"
106
+ end
107
+ end
108
+
109
+ desc 'Reinstall the gem'
110
+ task :reinstall => [:uninstall, :install]
111
+
112
+ end # namespace :gem
113
+
114
+ desc 'Alias to gem:package'
115
+ task :gem => 'gem:package'
116
+
117
+ task :clobber => 'gem:clobber_package'
118
+
119
+ remove_desc_for_task %w(gem:clobber_package)
120
+
121
+ # EOF
@@ -0,0 +1,49 @@
1
+ # $Id$
2
+
3
+ require 'find'
4
+
5
+ namespace :manifest do
6
+
7
+ desc 'Verify the manifest'
8
+ task :check do
9
+ fn = PROJ.manifest_file + '.tmp'
10
+ files = manifest_files
11
+
12
+ File.open(fn, 'w') {|fp| fp.puts files}
13
+ lines = %x(#{DIFF} -du #{PROJ.manifest_file} #{fn}).split("\n")
14
+ if HAVE_FACETS_ANSICODE and ENV.has_key?('TERM')
15
+ lines.map! do |line|
16
+ case line
17
+ when %r/^(-{3}|\+{3})/; nil
18
+ when %r/^@/; Console::ANSICode.blue line
19
+ when %r/^\+/; Console::ANSICode.green line
20
+ when %r/^\-/; Console::ANSICode.red line
21
+ else line end
22
+ end
23
+ end
24
+ puts lines.compact
25
+ rm fn rescue nil
26
+ end
27
+
28
+ desc 'Create a new manifest'
29
+ task :create do
30
+ files = manifest_files
31
+ unless test(?f, PROJ.manifest_file)
32
+ files << PROJ.manifest_file
33
+ files.sort!
34
+ end
35
+ File.open(PROJ.manifest_file, 'w') {|fp| fp.puts files}
36
+ end
37
+
38
+ task :assert do
39
+ files = manifest_files
40
+ manifest = File.read(PROJ.manifest_file).split($/)
41
+ raise "ERROR: #{PROJ.manifest_file} is out of date" unless files == manifest
42
+ end
43
+
44
+ end # namespace :manifest
45
+
46
+ desc 'Alias to manifest:check'
47
+ task :manifest => 'manifest:check'
48
+
49
+ # EOF
data/tasks/notes.rake ADDED
@@ -0,0 +1,28 @@
1
+ # $Id$
2
+
3
+ if HAVE_BONES
4
+
5
+ desc "Enumerate all annotations"
6
+ task :notes do |t|
7
+ id = if t.application.top_level_tasks.length > 1
8
+ t.application.top_level_tasks.slice!(1..-1).join(' ')
9
+ end
10
+ Bones::AnnotationExtractor.enumerate(
11
+ PROJ, PROJ.notes.tags.join('|'), id, :tag => true)
12
+ end
13
+
14
+ namespace :notes do
15
+ PROJ.notes.tags.each do |tag|
16
+ desc "Enumerate all #{tag} annotations"
17
+ task tag.downcase.to_sym do |t|
18
+ id = if t.application.top_level_tasks.length > 1
19
+ t.application.top_level_tasks.slice!(1..-1).join(' ')
20
+ end
21
+ Bones::AnnotationExtractor.enumerate(PROJ, tag, id)
22
+ end
23
+ end
24
+ end
25
+
26
+ end # if HAVE_BONES
27
+
28
+ # EOF
@@ -0,0 +1,39 @@
1
+ # $Id$
2
+
3
+ # This file does not define any rake tasks. It is used to load some project
4
+ # settings if they are not defined by the user.
5
+
6
+ PROJ.rdoc.exclude << "^#{Regexp.escape(PROJ.manifest_file)}$"
7
+ PROJ.exclude << ["^#{Regexp.escape(PROJ.ann.file)}$",
8
+ "^#{Regexp.escape(PROJ.rdoc.dir)}/",
9
+ "^#{Regexp.escape(PROJ.rcov.dir)}/"]
10
+
11
+ flatten_arrays = lambda do |this,os|
12
+ os.instance_variable_get(:@table).each do |key,val|
13
+ next if key == :dependencies
14
+ case val
15
+ when Array; val.flatten!
16
+ when OpenStruct; this.call(this,val)
17
+ end
18
+ end
19
+ end
20
+ flatten_arrays.call(flatten_arrays,PROJ)
21
+
22
+ PROJ.changes ||= paragraphs_of(PROJ.history_file, 0..1).join("\n\n")
23
+
24
+ PROJ.description ||= paragraphs_of(PROJ.readme_file, 'description').join("\n\n")
25
+
26
+ PROJ.summary ||= PROJ.description.split('.').first
27
+
28
+ PROJ.gem.files ||=
29
+ if test(?f, PROJ.manifest_file)
30
+ files = File.readlines(PROJ.manifest_file).map {|fn| fn.chomp.strip}
31
+ files.delete ''
32
+ files
33
+ else [] end
34
+
35
+ PROJ.gem.executables ||= PROJ.gem.files.find_all {|fn| fn =~ %r/^bin/}
36
+
37
+ PROJ.rdoc.main ||= PROJ.readme_file
38
+
39
+ # EOF
data/tasks/rdoc.rake ADDED
@@ -0,0 +1,51 @@
1
+ # $Id$
2
+
3
+ require 'rake/rdoctask'
4
+
5
+ namespace :doc do
6
+
7
+ desc 'Generate RDoc documentation'
8
+ Rake::RDocTask.new do |rd|
9
+ rdoc = PROJ.rdoc
10
+ rd.main = rdoc.main
11
+ rd.rdoc_dir = rdoc.dir
12
+
13
+ incl = Regexp.new(rdoc.include.join('|'))
14
+ excl = Regexp.new(rdoc.exclude.join('|'))
15
+ files = PROJ.gem.files.find_all do |fn|
16
+ case fn
17
+ when excl; false
18
+ when incl; true
19
+ else false end
20
+ end
21
+ rd.rdoc_files.push(*files)
22
+
23
+ title = "#{PROJ.name}-#{PROJ.version} Documentation"
24
+
25
+ rf_name = PROJ.rubyforge.name
26
+ title = "#{rf_name}'s " + title if rf_name.valid? and rf_name != title
27
+
28
+ rd.options << "-t #{title}"
29
+ rd.options.concat(rdoc.opts)
30
+ end
31
+
32
+ desc 'Generate ri locally for testing'
33
+ task :ri => :clobber_ri do
34
+ sh "#{RDOC} --ri -o ri ."
35
+ end
36
+
37
+ task :clobber_ri do
38
+ rm_r 'ri' rescue nil
39
+ end
40
+
41
+ end # namespace :doc
42
+
43
+ desc 'Alias to doc:rdoc'
44
+ task :doc => 'doc:rdoc'
45
+
46
+ desc 'Remove all build products'
47
+ task :clobber => %w(doc:clobber_rdoc doc:clobber_ri)
48
+
49
+ remove_desc_for_task %w(doc:clobber_rdoc)
50
+
51
+ # EOF
@@ -0,0 +1,57 @@
1
+ # $Id$
2
+
3
+ if PROJ.rubyforge.name.valid? && HAVE_RUBYFORGE
4
+
5
+ require 'rubyforge'
6
+ require 'rake/contrib/sshpublisher'
7
+
8
+ namespace :gem do
9
+ desc 'Package and upload to RubyForge'
10
+ task :release => [:clobber, :package] do |t|
11
+ v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
12
+ abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
13
+ pkg = "pkg/#{PROJ.gem._spec.full_name}"
14
+
15
+ if $DEBUG then
16
+ puts "release_id = rf.add_release #{PROJ.rubyforge.name.inspect}, #{PROJ.name.inspect}, #{PROJ.version.inspect}, \"#{pkg}.tgz\""
17
+ puts "rf.add_file #{PROJ.rubyforge.name.inspect}, #{PROJ.name.inspect}, release_id, \"#{pkg}.gem\""
18
+ end
19
+
20
+ rf = RubyForge.new
21
+ puts 'Logging in'
22
+ rf.login
23
+
24
+ c = rf.userconfig
25
+ c['release_notes'] = PROJ.description if PROJ.description
26
+ c['release_changes'] = PROJ.changes if PROJ.changes
27
+ c['preformatted'] = true
28
+
29
+ files = [(PROJ.gem.need_tar ? "#{pkg}.tgz" : nil),
30
+ (PROJ.gem.need_zip ? "#{pkg}.zip" : nil),
31
+ "#{pkg}.gem"].compact
32
+
33
+ puts "Releasing #{PROJ.name} v. #{PROJ.version}"
34
+ rf.add_release PROJ.rubyforge.name, PROJ.name, PROJ.version, *files
35
+ end
36
+ end # namespace :gem
37
+
38
+
39
+ namespace :doc do
40
+ desc "Publish RDoc to RubyForge"
41
+ task :release => %w(doc:clobber_rdoc doc:rdoc) do
42
+ config = YAML.load(
43
+ File.read(File.expand_path('~/.rubyforge/user-config.yml'))
44
+ )
45
+
46
+ host = "#{config['username']}@rubyforge.org"
47
+ remote_dir = "/var/www/gforge-projects/#{PROJ.rubyforge.name}/"
48
+ remote_dir << PROJ.rdoc.remote_dir if PROJ.rdoc.remote_dir
49
+ local_dir = PROJ.rdoc.dir
50
+
51
+ Rake::SshDirPublisher.new(host, remote_dir, local_dir).upload
52
+ end
53
+ end # namespace :doc
54
+
55
+ end # if HAVE_RUBYFORGE
56
+
57
+ # EOF
data/tasks/setup.rb ADDED
@@ -0,0 +1,266 @@
1
+ # $Id$
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+ require 'rake/clean'
6
+ require 'fileutils'
7
+ require 'ostruct'
8
+
9
+ class OpenStruct; undef :gem; end
10
+
11
+ PROJ = OpenStruct.new(
12
+ # Project Defaults
13
+ :name => nil,
14
+ :summary => nil,
15
+ :description => nil,
16
+ :changes => nil,
17
+ :authors => nil,
18
+ :email => nil,
19
+ :url => "\000",
20
+ :version => ENV['VERSION'] || '0.0.0',
21
+ :exclude => %w(tmp$ bak$ ~$ CVS .svn/ ^pkg/),
22
+ :release_name => ENV['RELEASE'],
23
+
24
+ # System Defaults
25
+ :ruby_opts => %w(-w),
26
+ :libs => [],
27
+ :history_file => 'History.txt',
28
+ :manifest_file => 'Manifest.txt',
29
+ :readme_file => 'README.txt',
30
+
31
+ # Announce
32
+ :ann => OpenStruct.new(
33
+ :file => 'announcement.txt',
34
+ :text => nil,
35
+ :paragraphs => [],
36
+ :email => {
37
+ :from => nil,
38
+ :to => %w(ruby-talk@ruby-lang.org),
39
+ :server => 'localhost',
40
+ :port => 25,
41
+ :domain => ENV['HOSTNAME'],
42
+ :acct => nil,
43
+ :passwd => nil,
44
+ :authtype => :plain
45
+ }
46
+ ),
47
+
48
+ # Gem Packaging
49
+ :gem => OpenStruct.new(
50
+ :dependencies => [],
51
+ :executables => nil,
52
+ :extensions => FileList['ext/**/extconf.rb'],
53
+ :files => nil,
54
+ :need_tar => true,
55
+ :need_zip => false,
56
+ :extras => {}
57
+ ),
58
+
59
+ # File Annotations
60
+ :notes => OpenStruct.new(
61
+ :exclude => %w(^tasks/setup.rb$),
62
+ :extensions => %w(.txt .rb .erb) << '',
63
+ :tags => %w(FIXME OPTIMIZE TODO)
64
+ ),
65
+
66
+ # Rcov
67
+ :rcov => OpenStruct.new(
68
+ :dir => 'coverage',
69
+ :opts => %w[--sort coverage -T],
70
+ :threshold => 90.0,
71
+ :threshold_exact => false
72
+ ),
73
+
74
+ # Rdoc
75
+ :rdoc => OpenStruct.new(
76
+ :opts => [],
77
+ :include => %w(^lib/ ^bin/ ^ext/ .txt$),
78
+ :exclude => %w(extconf.rb$),
79
+ :main => nil,
80
+ :dir => 'doc',
81
+ :remote_dir => nil
82
+ ),
83
+
84
+ # Rubyforge
85
+ :rubyforge => OpenStruct.new(
86
+ :name => "\000"
87
+ ),
88
+
89
+ # Rspec
90
+ :spec => OpenStruct.new(
91
+ :files => FileList['spec/**/*_spec.rb'],
92
+ :opts => []
93
+ ),
94
+
95
+ # Subversion Repository
96
+ :svn => OpenStruct.new(
97
+ :root => nil,
98
+ :path => '',
99
+ :trunk => 'trunk',
100
+ :tags => 'tags',
101
+ :branches => 'branches'
102
+ ),
103
+
104
+ # Test::Unit
105
+ :test => OpenStruct.new(
106
+ :files => FileList['test/**/test_*.rb'],
107
+ :file => 'test/all.rb',
108
+ :opts => []
109
+ )
110
+ )
111
+
112
+ # Load the other rake files in the tasks folder
113
+ rakefiles = Dir.glob('tasks/*.rake').sort
114
+ rakefiles.unshift(rakefiles.delete('tasks/post_load.rake')).compact!
115
+ import(*rakefiles)
116
+
117
+ # Setup the project libraries
118
+ %w(lib ext).each {|dir| PROJ.libs << dir if test ?d, dir}
119
+
120
+ # Setup some constants
121
+ WIN32 = %r/djgpp|(cyg|ms|bcc)win|mingw/ =~ RUBY_PLATFORM unless defined? WIN32
122
+
123
+ DEV_NULL = WIN32 ? 'NUL:' : '/dev/null'
124
+
125
+ def quiet( &block )
126
+ io = [STDOUT.dup, STDERR.dup]
127
+ STDOUT.reopen DEV_NULL
128
+ STDERR.reopen DEV_NULL
129
+ block.call
130
+ ensure
131
+ STDOUT.reopen io.first
132
+ STDERR.reopen io.last
133
+ $stdout, $stderr = STDOUT, STDERR
134
+ end
135
+
136
+ DIFF = if WIN32 then 'diff.exe'
137
+ else
138
+ if quiet {system "gdiff", __FILE__, __FILE__} then 'gdiff'
139
+ else 'diff' end
140
+ end unless defined? DIFF
141
+
142
+ SUDO = if WIN32 then ''
143
+ else
144
+ if quiet {system 'which sudo'} then 'sudo'
145
+ else '' end
146
+ end
147
+
148
+ RCOV = WIN32 ? 'rcov.bat' : 'rcov'
149
+ RDOC = WIN32 ? 'rdoc.bat' : 'rdoc'
150
+ GEM = WIN32 ? 'gem.bat' : 'gem'
151
+
152
+ %w(rcov spec/rake/spectask rubyforge bones facets/ansicode).each do |lib|
153
+ begin
154
+ require lib
155
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", true}
156
+ rescue LoadError
157
+ Object.instance_eval {const_set "HAVE_#{lib.tr('/','_').upcase}", false}
158
+ end
159
+ end
160
+ HAVE_SVN = (Dir.entries(Dir.pwd).include?('.svn') and
161
+ system("svn --version 2>&1 > #{DEV_NULL}"))
162
+
163
+ # Reads a file at +path+ and spits out an array of the +paragraphs+
164
+ # specified.
165
+ #
166
+ # changes = paragraphs_of('History.txt', 0..1).join("\n\n")
167
+ # summary, *description = paragraphs_of('README.txt', 3, 3..8)
168
+ #
169
+ def paragraphs_of( path, *paragraphs )
170
+ title = String === paragraphs.first ? paragraphs.shift : nil
171
+ ary = File.read(path).delete("\r").split(/\n\n+/)
172
+
173
+ result = if title
174
+ tmp, matching = [], false
175
+ rgxp = %r/^=+\s*#{Regexp.escape(title)}/i
176
+ paragraphs << (0..-1) if paragraphs.empty?
177
+
178
+ ary.each do |val|
179
+ if val =~ rgxp
180
+ break if matching
181
+ matching = true
182
+ rgxp = %r/^=+/i
183
+ elsif matching
184
+ tmp << val
185
+ end
186
+ end
187
+ tmp
188
+ else ary end
189
+
190
+ result.values_at(*paragraphs)
191
+ end
192
+
193
+ # Adds the given gem _name_ to the current project's dependency list. An
194
+ # optional gem _version_ can be given. If omitted, the newest gem version
195
+ # will be used.
196
+ #
197
+ def depend_on( name, version = nil )
198
+ spec = Gem.source_index.find_name(name).last
199
+ version = spec.version.to_s if version.nil? and !spec.nil?
200
+
201
+ PROJ.gem.dependencies << case version
202
+ when nil; [name]
203
+ when %r/^\d/; [name, ">= #{version}"]
204
+ else [name, version] end
205
+ end
206
+
207
+ # Adds the given arguments to the include path if they are not already there
208
+ #
209
+ def ensure_in_path( *args )
210
+ args.each do |path|
211
+ path = File.expand_path(path)
212
+ $:.unshift(path) if test(?d, path) and not $:.include?(path)
213
+ end
214
+ end
215
+
216
+ # Find a rake task using the task name and remove any description text. This
217
+ # will prevent the task from being displayed in the list of available tasks.
218
+ #
219
+ def remove_desc_for_task( names )
220
+ Array(names).each do |task_name|
221
+ task = Rake.application.tasks.find {|t| t.name == task_name}
222
+ next if task.nil?
223
+ task.instance_variable_set :@comment, nil
224
+ end
225
+ end
226
+
227
+ # Change working directories to _dir_, call the _block_ of code, and then
228
+ # change back to the original working directory (the current directory when
229
+ # this method was called).
230
+ #
231
+ def in_directory( dir, &block )
232
+ curdir = pwd
233
+ begin
234
+ cd dir
235
+ return block.call
236
+ ensure
237
+ cd curdir
238
+ end
239
+ end
240
+
241
+ # Scans the current working directory and creates a list of files that are
242
+ # candidates to be in the manifest.
243
+ #
244
+ def manifest_files
245
+ files = []
246
+ exclude = Regexp.new(PROJ.exclude.join('|'))
247
+ Find.find '.' do |path|
248
+ path.sub! %r/^(\.\/|\/)/o, ''
249
+ next unless test ?f, path
250
+ next if path =~ exclude
251
+ files << path
252
+ end
253
+ files.sort!
254
+ end
255
+
256
+ # We need a "valid" method thtat determines if a string is suitable for use
257
+ # in the gem specification.
258
+ #
259
+ class Object
260
+ def valid?
261
+ return !(self.empty? or self == "\000") if self.respond_to?(:to_str)
262
+ return false
263
+ end
264
+ end
265
+
266
+ # EOF
data/tasks/spec.rake ADDED
@@ -0,0 +1,55 @@
1
+ # $Id$
2
+
3
+ if HAVE_SPEC_RAKE_SPECTASK
4
+ require 'spec/rake/verify_rcov'
5
+
6
+ namespace :spec do
7
+
8
+ desc 'Run all specs with basic output'
9
+ Spec::Rake::SpecTask.new(:run) do |t|
10
+ t.ruby_opts = PROJ.ruby_opts
11
+ t.spec_opts = PROJ.spec.opts
12
+ t.spec_files = PROJ.spec.files
13
+ t.libs += PROJ.libs
14
+ end
15
+
16
+ desc 'Run all specs with text output'
17
+ Spec::Rake::SpecTask.new(:specdoc) do |t|
18
+ t.ruby_opts = PROJ.ruby_opts
19
+ t.spec_opts = PROJ.spec.opts + ['--format', 'specdoc']
20
+ t.spec_files = PROJ.spec.files
21
+ t.libs += PROJ.libs
22
+ end
23
+
24
+ if HAVE_RCOV
25
+ desc 'Run all specs with RCov'
26
+ Spec::Rake::SpecTask.new(:rcov) do |t|
27
+ t.ruby_opts = PROJ.ruby_opts
28
+ t.spec_opts = PROJ.spec.opts
29
+ t.spec_files = PROJ.spec.files
30
+ t.libs += PROJ.libs
31
+ t.rcov = true
32
+ t.rcov_dir = PROJ.rcov.dir
33
+ t.rcov_opts = PROJ.rcov.opts + ['--exclude', 'spec']
34
+ end
35
+
36
+ RCov::VerifyTask.new(:verify) do |t|
37
+ t.threshold = PROJ.rcov.threshold
38
+ t.index_html = File.join(PROJ.rcov.dir, 'index.html')
39
+ t.require_exact_threshold = PROJ.rcov.threshold_exact
40
+ end
41
+
42
+ task :verify => :rcov
43
+ remove_desc_for_task %w(spec:clobber_rcov)
44
+ end
45
+
46
+ end # namespace :spec
47
+
48
+ desc 'Alias to spec:run'
49
+ task :spec => 'spec:run'
50
+
51
+ task :clobber => 'spec:clobber_rcov' if HAVE_RCOV
52
+
53
+ end # if HAVE_SPEC_RAKE_SPECTASK
54
+
55
+ # EOF
data/tasks/svn.rake ADDED
@@ -0,0 +1,48 @@
1
+ # $Id$
2
+
3
+ if HAVE_SVN
4
+
5
+ unless PROJ.svn.root
6
+ info = %x/svn info ./
7
+ m = %r/^Repository Root:\s+(.*)$/.match(info)
8
+ PROJ.svn.root = (m.nil? ? '' : m[1])
9
+ end
10
+ PROJ.svn.root = File.join(PROJ.svn.root, PROJ.svn.path) unless PROJ.svn.path.empty?
11
+
12
+ namespace :svn do
13
+
14
+ # A prerequisites task that all other tasks depend upon
15
+ task :prereqs
16
+
17
+ desc 'Show tags from the SVN repository'
18
+ task :show_tags => 'svn:prereqs' do |t|
19
+ tags = %x/svn list #{File.join(PROJ.svn.root, PROJ.svn.tags)}/
20
+ tags.gsub!(%r/\/$/, '')
21
+ tags = tags.split("\n").sort {|a,b| b <=> a}
22
+ puts tags
23
+ end
24
+
25
+ desc 'Create a new tag in the SVN repository'
26
+ task :create_tag => 'svn:prereqs' do |t|
27
+ v = ENV['VERSION'] or abort 'Must supply VERSION=x.y.z'
28
+ abort "Versions don't match #{v} vs #{PROJ.version}" if v != PROJ.version
29
+
30
+ svn = PROJ.svn
31
+ trunk = File.join(svn.root, svn.trunk)
32
+ tag = "%s-%s" % [PROJ.name, PROJ.version]
33
+ tag = File.join(svn.root, svn.tags, tag)
34
+ msg = "Creating tag for #{PROJ.name} version #{PROJ.version}"
35
+
36
+ puts "Creating SVN tag '#{tag}'"
37
+ unless system "svn cp -m '#{msg}' #{trunk} #{tag}"
38
+ abort "Tag creation failed"
39
+ end
40
+ end
41
+
42
+ end # namespace :svn
43
+
44
+ task 'gem:release' => 'svn:create_tag'
45
+
46
+ end # if PROJ.svn.path
47
+
48
+ # EOF
data/tasks/test.rake ADDED
@@ -0,0 +1,38 @@
1
+ # $Id$
2
+
3
+ require 'rake/testtask'
4
+
5
+ namespace :test do
6
+
7
+ Rake::TestTask.new(:run) do |t|
8
+ t.libs = PROJ.libs
9
+ t.test_files = if test(?f, PROJ.test.file) then [PROJ.test.file]
10
+ else PROJ.test.files end
11
+ t.ruby_opts += PROJ.ruby_opts
12
+ t.ruby_opts += PROJ.test.opts
13
+ end
14
+
15
+ if HAVE_RCOV
16
+ desc 'Run rcov on the unit tests'
17
+ task :rcov => :clobber_rcov do
18
+ opts = PROJ.rcov.opts.dup << '-o' << PROJ.rcov.dir
19
+ opts = opts.join(' ')
20
+ files = if test(?f, PROJ.test.file) then [PROJ.test.file]
21
+ else PROJ.test.files end
22
+ files = files.join(' ')
23
+ sh "#{RCOV} #{files} #{opts}"
24
+ end
25
+
26
+ task :clobber_rcov do
27
+ rm_r 'coverage' rescue nil
28
+ end
29
+ end
30
+
31
+ end # namespace :test
32
+
33
+ desc 'Alias to test:run'
34
+ task :test => 'test:run'
35
+
36
+ task :clobber => 'test:clobber_rcov' if HAVE_RCOV
37
+
38
+ # EOF
File without changes
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: term-extractor
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Tom Taylor
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-05-29 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0.6"
23
+ version:
24
+ description: Extract terms from text using a selection of web based parsers.
25
+ email: tom@headshift.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files:
31
+ - History.txt
32
+ - README.txt
33
+ files:
34
+ - History.txt
35
+ - Manifest.txt
36
+ - README.txt
37
+ - Rakefile
38
+ - lib/term_extractor.rb
39
+ - lib/term_extractor/exceptions.rb
40
+ - lib/term_extractor/parsers/yahoo.rb
41
+ - spec/assets/content.sample
42
+ - spec/assets/yahoo_valid_response.xml
43
+ - spec/spec_helper.rb
44
+ - spec/term_extractor_spec.rb
45
+ - tasks/ann.rake
46
+ - tasks/bones.rake
47
+ - tasks/gem.rake
48
+ - tasks/manifest.rake
49
+ - tasks/notes.rake
50
+ - tasks/post_load.rake
51
+ - tasks/rdoc.rake
52
+ - tasks/rubyforge.rake
53
+ - tasks/setup.rb
54
+ - tasks/spec.rake
55
+ - tasks/svn.rake
56
+ - tasks/test.rake
57
+ - test/test_term_extractor.rb
58
+ has_rdoc: true
59
+ homepage: http://termextractor.rubyforge.org
60
+ post_install_message:
61
+ rdoc_options:
62
+ - --main
63
+ - README.txt
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: "0"
71
+ version:
72
+ required_rubygems_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ version: "0"
77
+ version:
78
+ requirements: []
79
+
80
+ rubyforge_project: termextractor
81
+ rubygems_version: 1.1.1
82
+ signing_key:
83
+ specification_version: 2
84
+ summary: Extract terms from text using a selection of web based parsers
85
+ test_files:
86
+ - test/test_term_extractor.rb