rdig 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -18,7 +18,7 @@ RDig.configuration do |cfg|
18
18
 
19
19
  # this is the path where the index will be stored
20
20
  # caution, existing contents of this directory will be deleted!
21
- cfg.indexer.path = '/path/to/index'
21
+ cfg.index.path = '/path/to/index'
22
22
 
23
23
  ##################################################################
24
24
  # options you might want to set, the given values are the defaults
@@ -255,6 +255,8 @@ module RDig
255
255
  content << ' '
256
256
  end
257
257
  elsif element.string # it's a Tag, and it has some content string
258
+ # skip inline scripts and styles
259
+ return nil if element.name =~ /^(script|style)$/i
258
260
  value = element.string.strip
259
261
  unless value.empty?
260
262
  content << value
data/lib/rdig/crawler.rb CHANGED
@@ -76,6 +76,8 @@ module RDig
76
76
  @documents << doc
77
77
  puts "added url #{url}" if RDig::config.verbose
78
78
  end
79
+ rescue
80
+ nil
79
81
  end
80
82
 
81
83
  end
@@ -32,7 +32,7 @@ module RDig
32
32
  begin
33
33
  @uri = URI.parse(args[:url])
34
34
  rescue URI::InvalidURIError
35
- raise "Cannot create document using invalid URL: #{url}"
35
+ raise "Cannot create document using invalid URL: #{args[:url]}"
36
36
  end
37
37
  end
38
38
 
@@ -118,7 +118,7 @@ module RDig
118
118
  @content = ContentExtractors.process(doc.read, doc.content_type)
119
119
  @status = :success
120
120
  when 404
121
- puts "got 404 for #{url}"
121
+ puts "got 404 for #{@uri}"
122
122
  else
123
123
  puts "don't know what to do with response: #{doc.status.join(' : ')}"
124
124
  end
@@ -89,11 +89,11 @@ module RDig
89
89
  @patterns = []
90
90
  if args.respond_to? :each
91
91
  args.each { |pattern|
92
- # cloning because unsure if regexps are thread safe...
93
- @patterns << pattern.clone
92
+ # cloning because unsure if regexps are thread safe ?
93
+ @patterns << pattern #.clone
94
94
  }
95
95
  else
96
- @patterns << args.clone
96
+ @patterns << args #.clone
97
97
  end
98
98
  end
99
99
  end
data/lib/rdig.rb CHANGED
@@ -24,7 +24,7 @@
24
24
  #++
25
25
  #
26
26
 
27
- RDIGVERSION = '0.3.0'
27
+ RDIGVERSION = '0.3.1'
28
28
 
29
29
 
30
30
  require 'thread'
@@ -228,6 +228,7 @@ module RDig
228
228
 
229
229
  # Run the +rdig+ application.
230
230
  def run
231
+ puts "RDig version #{RDIGVERSION}"
231
232
  handle_options
232
233
  begin
233
234
  load_configfile
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11.15
2
+ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: rdig
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.3.0
7
- date: 2006-04-26 00:00:00 +02:00
6
+ version: 0.3.1
7
+ date: 2006-07-26 00:00:00 +02:00
8
8
  summary: Ruby based web site indexing and searching library.
9
9
  require_paths:
10
10
  - lib
@@ -25,50 +25,49 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
- post_install_message:
29
28
  authors:
30
29
  - Jens Kraemer
31
30
  files:
32
31
  - bin/rdig
33
32
  - lib/rdig
34
- - lib/htmlentities
35
33
  - lib/rdig.rb
36
- - lib/rdig/crawler.rb
34
+ - lib/htmlentities
35
+ - lib/rdig/documents.rb
36
+ - lib/rdig/file.rb
37
+ - lib/rdig/content_extractors.rb
37
38
  - lib/rdig/search.rb
38
39
  - lib/rdig/highlight.rb
39
40
  - lib/rdig/index.rb
40
41
  - lib/rdig/url_filters.rb
41
- - lib/rdig/content_extractors.rb
42
- - lib/rdig/documents.rb
43
- - lib/rdig/file.rb
42
+ - lib/rdig/crawler.rb
43
+ - lib/htmlentities/htmlentities.rb
44
+ - lib/htmlentities/README
44
45
  - lib/htmlentities/CHANGES
45
46
  - lib/htmlentities/COPYING
46
- - lib/htmlentities/README
47
- - lib/htmlentities/htmlentities.rb
48
47
  - test/unit
49
48
  - test/fixtures
50
49
  - test/test_helper.rb
51
- - test/unit/etag_filter_test.rb
52
- - test/unit/url_filters_test.rb
53
50
  - test/unit/html_content_extractor_test.rb
54
- - test/unit/pdf_content_extractor_test.rb
51
+ - test/unit/url_filters_test.rb
55
52
  - test/unit/word_content_extractor_test.rb
56
- - test/unit/file_document_test.rb
57
53
  - test/unit/crawler_fs_test.rb
58
- - test/fixtures/html
54
+ - test/unit/etag_filter_test.rb
55
+ - test/unit/pdf_content_extractor_test.rb
56
+ - test/unit/file_document_test.rb
59
57
  - test/fixtures/pdf
58
+ - test/fixtures/html
60
59
  - test/fixtures/word
60
+ - test/fixtures/pdf/simple.pdf
61
61
  - test/fixtures/html/entities.html
62
- - test/fixtures/html/simple.html
63
62
  - test/fixtures/html/custom_tag_selectors.html
64
- - test/fixtures/pdf/simple.pdf
63
+ - test/fixtures/html/simple.html
65
64
  - test/fixtures/word/simple.doc
66
65
  - doc/examples
67
66
  - doc/examples/config.rb
68
- - LICENSE
69
67
  - TODO
70
- - CHANGES
68
+ - LICENSE
71
69
  - README
70
+ - CHANGES
72
71
  - install.rb
73
72
  - rakefile
74
73
  test_files: []