rdig 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGES +2 -0
- data/LICENSE +20 -0
- data/README +61 -0
- data/TODO +0 -0
- data/bin/rdig +32 -0
- data/doc/examples/config.rb +53 -0
- data/install.rb +89 -0
- data/lib/htmlentities/CHANGES +21 -0
- data/lib/htmlentities/COPYING +7 -0
- data/lib/htmlentities/README +15 -0
- data/lib/htmlentities/htmlentities.rb +281 -0
- data/lib/rdig.rb +243 -0
- data/lib/rdig/content_extractors.rb +145 -0
- data/lib/rdig/crawler.rb +176 -0
- data/lib/rdig/highlight.rb +24 -0
- data/lib/rdig/http_client.rb +22 -0
- data/lib/rdig/index.rb +39 -0
- data/lib/rdig/search.rb +77 -0
- data/lib/rdig/url_filters.rb +171 -0
- data/rakefile +325 -0
- data/test/fixtures/html/custom_tag_selectors.html +25 -0
- data/test/fixtures/html/entities.html +15 -0
- data/test/fixtures/html/simple.html +17 -0
- data/test/test_helper.rb +18 -0
- data/test/unit/etag_filter_test.rb +23 -0
- data/test/unit/html_content_extractor_test.rb +64 -0
- data/test/unit/url_filters_test.rb +96 -0
- metadata +102 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
module RDig
|
2
|
+
module Search
|
3
|
+
|
4
|
+
# beginning of a port of the Query term highlighter from Lucene contrib
|
5
|
+
class Highlighter
|
6
|
+
def initialize
|
7
|
+
@analyzer = RDig.config.ferret.analyzer
|
8
|
+
end
|
9
|
+
def best_fragments(scorer, text, max_fragments = 1)
|
10
|
+
token_stream = @analyzer.token_stream('body', text)
|
11
|
+
frag_texts = []
|
12
|
+
get_best_text_fragments(token_stream, text, max_fragments).each { |frag|
|
13
|
+
frag_texts << frag.to_s if (frag && frag.score > 0)
|
14
|
+
}
|
15
|
+
return frag_texts
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_best_text_fragments(token_stream, text, max_fragments)
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module RDig
|
2
|
+
|
3
|
+
module HttpClient
|
4
|
+
def do_get(uri, user_agent='RDig crawler')
|
5
|
+
# Set up the appropriate http headers
|
6
|
+
headers = { "User-Agent" => user_agent }
|
7
|
+
result = {}
|
8
|
+
|
9
|
+
begin
|
10
|
+
Net::HTTP.start(uri.host, (uri.port or 80)) { |http|
|
11
|
+
final_uri = uri.path
|
12
|
+
final_uri += ('?' + uri.query) if uri.query
|
13
|
+
return http.get(final_uri, headers)
|
14
|
+
}
|
15
|
+
rescue => error
|
16
|
+
puts error
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
data/lib/rdig/index.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
module RDig
|
2
|
+
module Index
|
3
|
+
|
4
|
+
# used by the crawler to build the ferret index
|
5
|
+
class Indexer
|
6
|
+
include MonitorMixin, Ferret::Index, Ferret::Document
|
7
|
+
|
8
|
+
def initialize(settings)
|
9
|
+
#@ferret_config = settings
|
10
|
+
@index_writer = IndexWriter.new(settings.path,
|
11
|
+
:create => settings.create,
|
12
|
+
:analyzer => settings.analyzer)
|
13
|
+
super() # scary, MonitorMixin won't initialize if we don't call super() here (parens matter)
|
14
|
+
end
|
15
|
+
|
16
|
+
def add_to_index(document)
|
17
|
+
puts "add to index: #{document.uri.to_s}"
|
18
|
+
doc = Ferret::Document::Document.new
|
19
|
+
doc << Field.new("url", document.url,
|
20
|
+
Field::Store::YES, Field::Index::UNTOKENIZED)
|
21
|
+
doc << Field.new("title", document.title,
|
22
|
+
Field::Store::YES, Field::Index::TOKENIZED)
|
23
|
+
doc << Field.new("data", document.body,
|
24
|
+
Field::Store::YES, Field::Index::TOKENIZED)
|
25
|
+
synchronize do
|
26
|
+
@index_writer << doc
|
27
|
+
end
|
28
|
+
end
|
29
|
+
alias :<< :add_to_index
|
30
|
+
|
31
|
+
def close
|
32
|
+
@index_writer.optimize
|
33
|
+
@index_writer.close
|
34
|
+
@index_writer = nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
data/lib/rdig/search.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
module RDig
|
2
|
+
module Search
|
3
|
+
|
4
|
+
# This class is used to search the index.
|
5
|
+
# Call RDig::searcher to retrieve an instance ready for use.
|
6
|
+
class Searcher
|
7
|
+
include Ferret::Search
|
8
|
+
|
9
|
+
# the query parser used to parse query strings
|
10
|
+
attr_reader :query_parser
|
11
|
+
|
12
|
+
# takes the ferret section of the rdig configuration as a parameter.
|
13
|
+
def initialize(settings)
|
14
|
+
@ferret_config = settings
|
15
|
+
@query_parser = Ferret::QueryParser.new('*', settings.marshal_dump)
|
16
|
+
ferret_searcher
|
17
|
+
end
|
18
|
+
|
19
|
+
# returns the Ferret::Search::IndexSearcher instance used internally.
|
20
|
+
def ferret_searcher
|
21
|
+
if @ferret_searcher and !@ferret_searcher.reader.latest?
|
22
|
+
# reopen searcher
|
23
|
+
@ferret_searcher.close
|
24
|
+
@ferret_searcher = nil
|
25
|
+
end
|
26
|
+
unless @ferret_searcher
|
27
|
+
@ferret_searcher = IndexSearcher.new(@ferret_config.path)
|
28
|
+
@query_parser.fields = @ferret_searcher.reader.get_field_names.to_a
|
29
|
+
end
|
30
|
+
@ferret_searcher
|
31
|
+
end
|
32
|
+
|
33
|
+
# run a search.
|
34
|
+
# +query+ usually will be a user-entered string. See the Ferret query
|
35
|
+
# language[http://ferret.davebalmain.com/api/classes/Ferret/QueryParser.html]
|
36
|
+
# for more information on queries.
|
37
|
+
# A Ferret::Search::Query instance may be given, too.
|
38
|
+
#
|
39
|
+
# Otions are:
|
40
|
+
# first_doc:: first document in result list to retrieve (0-based). The default is 0.
|
41
|
+
# num_docs:: number of documents to retrieve. The default is 10.
|
42
|
+
def search(query, options={})
|
43
|
+
result = {}
|
44
|
+
query = query_parser.parse(query) if query.is_a?(String)
|
45
|
+
puts "Query: #{query}"
|
46
|
+
hits = ferret_searcher.search(query, options)
|
47
|
+
result[:hitcount] = hits.total_hits
|
48
|
+
results = []
|
49
|
+
hits.each { |doc_id,score|
|
50
|
+
doc = ferret_searcher.reader.get_document doc_id
|
51
|
+
results << { :score => score,
|
52
|
+
:title => doc['title'],
|
53
|
+
:url => doc['url'],
|
54
|
+
:extract => build_extract(doc['data']) }
|
55
|
+
}
|
56
|
+
result[:list] = results
|
57
|
+
result
|
58
|
+
end
|
59
|
+
|
60
|
+
def build_extract(data)
|
61
|
+
(data && data.length > 200) ? data[0..200] : data
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
# class SearchResult < OpenStruct
|
67
|
+
# def initialize(doc, score)
|
68
|
+
# self.score = score
|
69
|
+
# self.title = doc[:title]
|
70
|
+
# self.url = doc[:url]
|
71
|
+
# self.extract = doc[:content][0..200]
|
72
|
+
# end
|
73
|
+
# end
|
74
|
+
|
75
|
+
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
module RDig
|
2
|
+
|
3
|
+
module UrlFilters
|
4
|
+
|
5
|
+
class FilterChain
|
6
|
+
def initialize(chain_config)
|
7
|
+
@filters = []
|
8
|
+
chain_config.each { |filter|
|
9
|
+
case filter
|
10
|
+
when Hash
|
11
|
+
filter.each_pair { |f, args|
|
12
|
+
add(f, args)
|
13
|
+
}
|
14
|
+
when Array
|
15
|
+
args = filter
|
16
|
+
filter = args.shift
|
17
|
+
add(filter, args)
|
18
|
+
else
|
19
|
+
add(filter)
|
20
|
+
end
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
# add a filter and it's args to the chain
|
25
|
+
# when args is a symbol, it is treated as a configuration key
|
26
|
+
def add(filter, args=nil)
|
27
|
+
args = RDig.config.crawler.send(args) if args.is_a? Symbol
|
28
|
+
case filter
|
29
|
+
when Symbol
|
30
|
+
if args.nil?
|
31
|
+
@filters << lambda { |document|
|
32
|
+
UrlFilters.send(filter, document)
|
33
|
+
}
|
34
|
+
else
|
35
|
+
@filters << lambda { |document|
|
36
|
+
UrlFilters.send(filter, document, args)
|
37
|
+
}
|
38
|
+
end
|
39
|
+
when Class
|
40
|
+
if args.nil?
|
41
|
+
if filter.respond_to?(:instance)
|
42
|
+
filter_instance = filter.instance
|
43
|
+
else
|
44
|
+
filter_instance = filter.new
|
45
|
+
end
|
46
|
+
else
|
47
|
+
filter_instance = filter.new(args)
|
48
|
+
end
|
49
|
+
@filters << lambda { |document|
|
50
|
+
filter_instance.apply(document)
|
51
|
+
}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def apply(document)
|
56
|
+
@filters.each { |filter|
|
57
|
+
return nil unless filter.call(document)
|
58
|
+
}
|
59
|
+
return document
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# takes care of a list of all Urls visited during a crawl, to avoid
|
64
|
+
# indexing pages more than once
|
65
|
+
# implemented as a thread safe singleton as it has to be shared
|
66
|
+
# between all crawler threads
|
67
|
+
class VisitedUrlFilter
|
68
|
+
include MonitorMixin, Singleton
|
69
|
+
def initialize
|
70
|
+
@visited_urls = Set.new
|
71
|
+
super
|
72
|
+
end
|
73
|
+
|
74
|
+
# return document if this document's url has not been visited yet,
|
75
|
+
# nil otherwise
|
76
|
+
def apply(document)
|
77
|
+
synchronize do
|
78
|
+
@visited_urls.add?(document.uri.to_s) ? document : nil
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
# base class for url inclusion / exclusion filters
|
85
|
+
class UrlPatternFilter
|
86
|
+
# takes an Array of Regexps, or nil to disable the filter
|
87
|
+
def initialize(args=nil)
|
88
|
+
unless args.nil?
|
89
|
+
@patterns = []
|
90
|
+
if args.respond_to? :each
|
91
|
+
args.each { |pattern|
|
92
|
+
# cloning because unsure if regexps are thread safe...
|
93
|
+
@patterns << pattern.clone
|
94
|
+
}
|
95
|
+
else
|
96
|
+
@patterns << args.clone
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
class UrlExclusionFilter < UrlPatternFilter
|
102
|
+
# returns nil if any of the patterns matches it's URL,
|
103
|
+
# the document itself otherwise
|
104
|
+
def apply(document)
|
105
|
+
return document unless @patterns
|
106
|
+
@patterns.each { |p|
|
107
|
+
return nil if document.uri.to_s =~ p
|
108
|
+
}
|
109
|
+
return document
|
110
|
+
end
|
111
|
+
end
|
112
|
+
class UrlInclusionFilter < UrlPatternFilter
|
113
|
+
# returns nil if any of the patterns matches it's URL,
|
114
|
+
# the document itself otherwise
|
115
|
+
def apply(document)
|
116
|
+
return document unless @patterns
|
117
|
+
@patterns.each { |p|
|
118
|
+
return document if document.uri.to_s =~ p
|
119
|
+
}
|
120
|
+
return nil
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
# checks redirect count of the given document
|
128
|
+
# takes it out of the chain if number of redirections exceeds the
|
129
|
+
# max_redirects setting
|
130
|
+
def UrlFilters.maximum_redirect_filter(document, max_redirects)
|
131
|
+
return nil if document.redirections > max_redirects
|
132
|
+
return document
|
133
|
+
end
|
134
|
+
|
135
|
+
# expands both href="/path/xyz.html" and href="affe.html"
|
136
|
+
# to full urls
|
137
|
+
def UrlFilters.fix_relative_uri(document)
|
138
|
+
return nil unless document.uri.scheme.nil? || document.uri.scheme =~ /^http/i
|
139
|
+
ref = document.referring_uri
|
140
|
+
return document unless ref
|
141
|
+
uri = document.uri
|
142
|
+
uri.scheme = ref.scheme unless uri.scheme
|
143
|
+
uri.host = ref.host unless uri.host
|
144
|
+
uri.port = ref.port unless uri.port || ref.port==ref.default_port
|
145
|
+
uri.path = ref.path unless uri.path
|
146
|
+
|
147
|
+
if uri.path !~ /^\//
|
148
|
+
ref_path = ref.path || '/'
|
149
|
+
ref_path << '/' if ref_path.empty?
|
150
|
+
uri.path = ref_path[0..ref_path.rindex('/')] + uri.path
|
151
|
+
end
|
152
|
+
return document
|
153
|
+
end
|
154
|
+
|
155
|
+
def UrlFilters.hostname_filter(document, include_hosts)
|
156
|
+
return document if include_hosts.include?(document.uri.host)
|
157
|
+
return nil
|
158
|
+
end
|
159
|
+
|
160
|
+
def UrlFilters.normalize_uri(document)
|
161
|
+
document.uri.fragment = nil
|
162
|
+
# document.uri.query = nil
|
163
|
+
# append index document if configured and path ends with a slash
|
164
|
+
if RDig.config.index_document && document.uri.path =~ /\/$/
|
165
|
+
document.uri.path << RDig.config.index_document
|
166
|
+
end
|
167
|
+
return document
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
data/rakefile
ADDED
@@ -0,0 +1,325 @@
|
|
1
|
+
# rakefile for RDig.
|
2
|
+
# large parts borrowed from rake's Rakefile
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubygems'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
rescue Exception
|
8
|
+
nil
|
9
|
+
end
|
10
|
+
require 'rake'
|
11
|
+
require 'rake/testtask'
|
12
|
+
require 'rake/rdoctask'
|
13
|
+
require 'rake/packagetask'
|
14
|
+
require 'rake/contrib/rubyforgepublisher'
|
15
|
+
|
16
|
+
def announce(msg='')
|
17
|
+
STDERR.puts msg
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
PKG_NAME = 'rdig'
|
22
|
+
|
23
|
+
# Determine the current version of the software
|
24
|
+
if `ruby -Ilib ./bin/rdig --version` =~ /rdig, version ([0-9.]+)$/
|
25
|
+
CURRENT_VERSION = $1
|
26
|
+
else
|
27
|
+
CURRENT_VERSION = "0.0.0"
|
28
|
+
end
|
29
|
+
|
30
|
+
if ENV['REL']
|
31
|
+
PKG_VERSION = ENV['REL']
|
32
|
+
else
|
33
|
+
PKG_VERSION = CURRENT_VERSION
|
34
|
+
end
|
35
|
+
|
36
|
+
SRC_RB = FileList['lib/**/*.rb']
|
37
|
+
|
38
|
+
PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
|
39
|
+
|
40
|
+
RELEASE_NAME = "REL #{PKG_VERSION}"
|
41
|
+
|
42
|
+
RUBY_FORGE_PROJECT = "rdig"
|
43
|
+
RUBY_FORGE_USER = "jkraemer"
|
44
|
+
|
45
|
+
PKG_FILES = FileList[
|
46
|
+
"bin/**/*",
|
47
|
+
"lib/**/*",
|
48
|
+
"test/**/*",
|
49
|
+
"doc/**/*",
|
50
|
+
"[A-Z]*",
|
51
|
+
"install.rb",
|
52
|
+
"rakefile"
|
53
|
+
].exclude(/\.svn|~$|\.swp$/)
|
54
|
+
|
55
|
+
|
56
|
+
desc "Default Task"
|
57
|
+
task :default => [ :test_all ]
|
58
|
+
|
59
|
+
# Test Tasks -------------------------------------------------------------
|
60
|
+
|
61
|
+
task :ta => :test_all
|
62
|
+
task :tf => :test_functional
|
63
|
+
task :tu => :test_units
|
64
|
+
|
65
|
+
# Run all tests
|
66
|
+
Rake::TestTask.new("test_all") { |t|
|
67
|
+
t.test_files = FileList[
|
68
|
+
'test/unit/*_test.rb',
|
69
|
+
'test/functional/*_test.rb'
|
70
|
+
]
|
71
|
+
t.libs << "test"
|
72
|
+
#t.warning = true
|
73
|
+
t.verbose = true
|
74
|
+
}
|
75
|
+
|
76
|
+
# Run unit tests
|
77
|
+
Rake::TestTask.new("test_units") { |t|
|
78
|
+
t.test_files = FileList[ 'test/unit/*_test.rb' ]
|
79
|
+
t.libs << "test"
|
80
|
+
#t.warning = true
|
81
|
+
t.verbose = true
|
82
|
+
}
|
83
|
+
|
84
|
+
# Run functional tests
|
85
|
+
Rake::TestTask.new("test_functional") { |t|
|
86
|
+
t.test_files = FileList[ 'test/functional/*_test.rb' ]
|
87
|
+
t.libs << "test"
|
88
|
+
#t.warning = true
|
89
|
+
t.verbose = true
|
90
|
+
}
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
# Generate the RDoc documentation ----------------------------------------
|
95
|
+
|
96
|
+
rd = Rake::RDocTask.new { |rdoc|
|
97
|
+
rdoc.rdoc_dir = 'doc/html'
|
98
|
+
rdoc.title = "RDig - Ferret based full text search for web sites"
|
99
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
100
|
+
rdoc.options << '--main' << 'README'
|
101
|
+
rdoc.template = "#{ENV['template']}.rb" if ENV['template']
|
102
|
+
rdoc.rdoc_files.include('README', 'CHANGES', 'LICENSE', 'TODO')
|
103
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
104
|
+
}
|
105
|
+
|
106
|
+
|
107
|
+
# packaging --------------------------------------------------------------
|
108
|
+
|
109
|
+
# ====================================================================
|
110
|
+
# Create a task that will package the software into distributable
|
111
|
+
# tar, zip and gem files.
|
112
|
+
|
113
|
+
if ! defined?(Gem)
|
114
|
+
puts "Package Target requires RubyGEMs"
|
115
|
+
else
|
116
|
+
spec = Gem::Specification.new do |s|
|
117
|
+
|
118
|
+
#### Basic information.
|
119
|
+
|
120
|
+
s.name = 'rdig'
|
121
|
+
s.version = PKG_VERSION
|
122
|
+
s.summary = "Ruby based web site indexing and searching library."
|
123
|
+
s.description = <<-EOF
|
124
|
+
RDig provides an HTTP crawler and content extraction utilities
|
125
|
+
to help building a site search for web sites or intranets. Internally,
|
126
|
+
Ferret is used for the full text indexing. After creating a config file
|
127
|
+
for your site, the index can be built with a single call to rdig.
|
128
|
+
EOF
|
129
|
+
|
130
|
+
#### Dependencies and requirements.
|
131
|
+
|
132
|
+
s.add_dependency('ferret', '>= 0.3.2')
|
133
|
+
s.add_dependency('rubyful_soup', '>= 1.0.4')
|
134
|
+
#s.requirements << ""
|
135
|
+
|
136
|
+
#### Which files are to be included in this gem? Everything! (Except CVS directories.)
|
137
|
+
|
138
|
+
s.files = PKG_FILES.to_a
|
139
|
+
|
140
|
+
#### Load-time details: library and application (you will need one or both).
|
141
|
+
|
142
|
+
s.require_path = 'lib' # Use these for libraries.
|
143
|
+
s.bindir = "bin" # Use these for applications.
|
144
|
+
s.executables = ["rdig"]
|
145
|
+
s.default_executable = "rdig"
|
146
|
+
|
147
|
+
#### Documentation and testing.
|
148
|
+
|
149
|
+
s.has_rdoc = true
|
150
|
+
s.extra_rdoc_files = rd.rdoc_files.reject { |fn| fn =~ /\.rb$/ }.to_a
|
151
|
+
s.rdoc_options <<
|
152
|
+
'--title' << 'Rake -- Ruby Make' <<
|
153
|
+
'--main' << 'README' <<
|
154
|
+
'--line-numbers'
|
155
|
+
|
156
|
+
#### Author and project details.
|
157
|
+
|
158
|
+
s.author = "Jens Kraemer"
|
159
|
+
s.email = "jk@jkraemer.net"
|
160
|
+
s.homepage = "http://rdig.rubyforge.org/"
|
161
|
+
s.rubyforge_project = "rdig"
|
162
|
+
# if ENV['CERT_DIR']
|
163
|
+
# s.signing_key = File.join(ENV['CERT_DIR'], 'gem-private_key.pem')
|
164
|
+
# s.cert_chain = [File.join(ENV['CERT_DIR'], 'gem-public_cert.pem')]
|
165
|
+
# end
|
166
|
+
end
|
167
|
+
|
168
|
+
package_task = Rake::GemPackageTask.new(spec) do |pkg|
|
169
|
+
pkg.need_zip = true
|
170
|
+
pkg.need_tar = true
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
# misc ----------------------------------------------------------------
|
177
|
+
|
178
|
+
def count_lines(filename)
|
179
|
+
lines = 0
|
180
|
+
codelines = 0
|
181
|
+
open(filename) { |f|
|
182
|
+
f.each do |line|
|
183
|
+
lines += 1
|
184
|
+
next if line =~ /^\s*$/
|
185
|
+
next if line =~ /^\s*#/
|
186
|
+
codelines += 1
|
187
|
+
end
|
188
|
+
}
|
189
|
+
[lines, codelines]
|
190
|
+
end
|
191
|
+
|
192
|
+
def show_line(msg, lines, loc)
|
193
|
+
printf "%6s %6s %s\n", lines.to_s, loc.to_s, msg
|
194
|
+
end
|
195
|
+
|
196
|
+
desc "Count lines in the main rake file"
|
197
|
+
task :lines do
|
198
|
+
total_lines = 0
|
199
|
+
total_code = 0
|
200
|
+
show_line("File Name", "LINES", "LOC")
|
201
|
+
SRC_RB.each do |fn|
|
202
|
+
lines, codelines = count_lines(fn)
|
203
|
+
show_line(fn, lines, codelines)
|
204
|
+
total_lines += lines
|
205
|
+
total_code += codelines
|
206
|
+
end
|
207
|
+
show_line("TOTAL", total_lines, total_code)
|
208
|
+
end
|
209
|
+
|
210
|
+
# Define an optional publish target in an external file. If the
|
211
|
+
# publish.rf file is not found, the publish targets won't be defined.
|
212
|
+
|
213
|
+
load "publish.rf" if File.exist? "publish.rf"
|
214
|
+
|
215
|
+
|
216
|
+
# Support Tasks ------------------------------------------------------
|
217
|
+
|
218
|
+
desc "Look for TODO and FIXME tags in the code"
|
219
|
+
task :todo do
|
220
|
+
FileList['**/*.rb'].exclude('pkg').egrep /#.*(FIXME|TODO|TBD)/
|
221
|
+
end
|
222
|
+
|
223
|
+
desc "Look for Debugging print lines"
|
224
|
+
task :dbg do
|
225
|
+
FileList['**/*.rb'].egrep /\bDBG|\bbreakpoint\b/
|
226
|
+
end
|
227
|
+
|
228
|
+
desc "List all ruby files"
|
229
|
+
task :rubyfiles do
|
230
|
+
puts Dir['**/*.rb'].reject { |fn| fn =~ /^pkg/ }
|
231
|
+
puts Dir['bin/*'].reject { |fn| fn =~ /CVS|(~$)|(\.rb$)/ }
|
232
|
+
end
|
233
|
+
task :rf => :rubyfiles
|
234
|
+
|
235
|
+
|
236
|
+
# --------------------------------------------------------------------
|
237
|
+
# Creating a release
|
238
|
+
|
239
|
+
desc "Make a new release"
|
240
|
+
task :release => [
|
241
|
+
:prerelease,
|
242
|
+
:clobber,
|
243
|
+
:test_all,
|
244
|
+
:update_version,
|
245
|
+
:package,
|
246
|
+
:tag] do
|
247
|
+
|
248
|
+
announce
|
249
|
+
announce "**************************************************************"
|
250
|
+
announce "* Release #{PKG_VERSION} Complete."
|
251
|
+
announce "* Packages ready to upload."
|
252
|
+
announce "**************************************************************"
|
253
|
+
announce
|
254
|
+
end
|
255
|
+
|
256
|
+
# Validate that everything is ready to go for a release.
|
257
|
+
task :prerelease do
|
258
|
+
announce
|
259
|
+
announce "**************************************************************"
|
260
|
+
announce "* Making RubyGem Release #{PKG_VERSION}"
|
261
|
+
announce "* (current version #{CURRENT_VERSION})"
|
262
|
+
announce "**************************************************************"
|
263
|
+
announce
|
264
|
+
|
265
|
+
# Is a release number supplied?
|
266
|
+
unless ENV['REL']
|
267
|
+
fail "Usage: rake release REL=x.y.z [REUSE=tag_suffix]"
|
268
|
+
end
|
269
|
+
|
270
|
+
# Is the release different than the current release.
|
271
|
+
# (or is REUSE set?)
|
272
|
+
if PKG_VERSION == CURRENT_VERSION && ! ENV['REUSE']
|
273
|
+
fail "Current version is #{PKG_VERSION}, must specify REUSE=tag_suffix to reuse version"
|
274
|
+
end
|
275
|
+
|
276
|
+
# Are all source files checked in?
|
277
|
+
if ENV['RELTEST']
|
278
|
+
announce "Release Task Testing, skipping checked-in file test"
|
279
|
+
else
|
280
|
+
announce "Checking for unchecked-in files..."
|
281
|
+
data = `svn st`
|
282
|
+
unless data =~ /^$/
|
283
|
+
fail "SVN status is not clean ... do you have unchecked-in files?"
|
284
|
+
end
|
285
|
+
announce "No outstanding checkins found ... OK"
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
task :update_version => [:prerelease] do
|
290
|
+
if PKG_VERSION == CURRENT_VERSION
|
291
|
+
announce "No version change ... skipping version update"
|
292
|
+
else
|
293
|
+
announce "Updating RDig version to #{PKG_VERSION}"
|
294
|
+
open("lib/rdig.rb") do |rakein|
|
295
|
+
open("lib/rdig.rb.new", "w") do |rakeout|
|
296
|
+
rakein.each do |line|
|
297
|
+
if line =~ /^RDIGVERSION\s*=\s*/
|
298
|
+
rakeout.puts "RDIGVERSION = '#{PKG_VERSION}'"
|
299
|
+
else
|
300
|
+
rakeout.puts line
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
mv "lib/rdig.rb.new", "lib/rdig.rb"
|
306
|
+
if ENV['RELTEST']
|
307
|
+
announce "Release Task Testing, skipping commiting of new version"
|
308
|
+
else
|
309
|
+
sh %{svn commit -m "Updated to version #{PKG_VERSION}" lib/rdig.rb}
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
desc "Tag all files with the latest release number (REL=x.y.z)"
|
315
|
+
task :tag => [:prerelease] do
|
316
|
+
reltag = "REL_#{PKG_VERSION.gsub(/\./, '_')}"
|
317
|
+
reltag << ENV['REUSE'].gsub(/\./, '_') if ENV['REUSE']
|
318
|
+
announce "Tagging with [#{reltag}]"
|
319
|
+
if ENV['RELTEST']
|
320
|
+
announce "Release Task Testing, skipping tagging"
|
321
|
+
else
|
322
|
+
sh %{cd ..; svn copy trunk tags/#{reltag}}
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|