rdig 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. data/CHANGES +7 -0
  2. data/README +1 -1
  3. data/lib/rdig.rb +2 -2
  4. data/lib/rdig/index.rb +11 -12
  5. data/rakefile +1 -1
  6. metadata +21 -20
data/CHANGES CHANGED
@@ -1,3 +1,10 @@
1
+ 0.3.2
2
+ - make RDig compatible with Ferret 0.10.x
3
+ - won't work any more with Ferret 0.9.x and before
4
+
5
+ 0.3.1
6
+ - Bug fix release: fixed handling of unparseable URLs
7
+
1
8
  0.3.0
2
9
  - file system crawling
3
10
  - optional url rewriting before indexing, e.g. for linking to results
data/README CHANGED
@@ -5,7 +5,7 @@ to help building a site search for web sites or intranets. Internally,
5
5
  Ferret is used for the full text indexing. After creating a config file
6
6
  for your site, the index can be built with a single call to rdig.
7
7
 
8
- RDig depends on Ferret (>= 0.3.2) and the RubyfulSoup library (>= 1.0.4).
8
+ RDig depends on Ferret (>= 0.10.0) and the RubyfulSoup library (>= 1.0.4).
9
9
 
10
10
  == basic usage
11
11
 
data/lib/rdig.rb CHANGED
@@ -24,7 +24,7 @@
24
24
  #++
25
25
  #
26
26
 
27
- RDIGVERSION = '0.3.1'
27
+ RDIGVERSION = '0.3.2'
28
28
 
29
29
 
30
30
  require 'thread'
@@ -142,7 +142,7 @@ module RDig
142
142
  :create => true,
143
143
  :handle_parse_errors => true,
144
144
  :analyzer => Ferret::Analysis::StandardAnalyzer.new,
145
- :occur_default => Ferret::Search::BooleanClause::Occur::MUST
145
+ :occur_default => :must
146
146
  )
147
147
  )
148
148
  end
data/lib/rdig/index.rb CHANGED
@@ -3,27 +3,26 @@ module RDig
3
3
 
4
4
  # used by the crawler to build the ferret index
5
5
  class Indexer
6
- include MonitorMixin, Ferret::Index, Ferret::Document
6
+ include MonitorMixin
7
7
 
8
8
  def initialize(settings)
9
9
  @config = settings
10
- @index_writer = IndexWriter.new(settings.path,
11
- :create => settings.create,
12
- :analyzer => settings.analyzer)
10
+ @index_writer = Ferret::Index::IndexWriter.new(
11
+ :path => settings.path,
12
+ :create => settings.create,
13
+ :analyzer => settings.analyzer)
13
14
  super() # scary, MonitorMixin won't initialize if we don't call super() here (parens matter)
14
15
  end
15
16
 
16
17
  def add_to_index(document)
17
18
  puts "add to index: #{document.uri.to_s}" if RDig::config.verbose
18
- doc = Ferret::Document::Document.new
19
19
  @config.rewrite_uri.call(document.uri) if @config.rewrite_uri
20
-
21
- doc << Field.new("url", document.uri.to_s,
22
- Field::Store::YES, Field::Index::TOKENIZED)
23
- doc << Field.new("title", document.title,
24
- Field::Store::YES, Field::Index::TOKENIZED)
25
- doc << Field.new("data", document.body,
26
- Field::Store::YES, Field::Index::TOKENIZED)
20
+ # all stored and tokenized, should be ferret defaults
21
+ doc = {
22
+ :url => document.uri.to_s,
23
+ :title => document.title,
24
+ :data => document.body
25
+ }
27
26
  synchronize do
28
27
  @index_writer << doc
29
28
  end
data/rakefile CHANGED
@@ -129,7 +129,7 @@ else
129
129
 
130
130
  #### Dependencies and requirements.
131
131
 
132
- s.add_dependency('ferret', '>= 0.3.2')
132
+ s.add_dependency('ferret', '>= 0.10.0')
133
133
  s.add_dependency('rubyful_soup', '>= 1.0.4')
134
134
  #s.requirements << ""
135
135
 
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
2
+ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: rdig
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.3.1
7
- date: 2006-07-26 00:00:00 +02:00
6
+ version: 0.3.2
7
+ date: 2006-10-09 00:00:00 +02:00
8
8
  summary: Ruby based web site indexing and searching library.
9
9
  require_paths:
10
10
  - lib
@@ -25,49 +25,50 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
+ post_install_message:
28
29
  authors:
29
30
  - Jens Kraemer
30
31
  files:
31
32
  - bin/rdig
32
33
  - lib/rdig
33
- - lib/rdig.rb
34
34
  - lib/htmlentities
35
- - lib/rdig/documents.rb
36
- - lib/rdig/file.rb
37
- - lib/rdig/content_extractors.rb
35
+ - lib/rdig.rb
36
+ - lib/rdig/crawler.rb
38
37
  - lib/rdig/search.rb
39
38
  - lib/rdig/highlight.rb
40
39
  - lib/rdig/index.rb
41
40
  - lib/rdig/url_filters.rb
42
- - lib/rdig/crawler.rb
43
- - lib/htmlentities/htmlentities.rb
44
- - lib/htmlentities/README
41
+ - lib/rdig/content_extractors.rb
42
+ - lib/rdig/documents.rb
43
+ - lib/rdig/file.rb
45
44
  - lib/htmlentities/CHANGES
46
45
  - lib/htmlentities/COPYING
46
+ - lib/htmlentities/README
47
+ - lib/htmlentities/htmlentities.rb
47
48
  - test/unit
48
49
  - test/fixtures
49
50
  - test/test_helper.rb
50
- - test/unit/html_content_extractor_test.rb
51
- - test/unit/url_filters_test.rb
52
- - test/unit/word_content_extractor_test.rb
53
- - test/unit/crawler_fs_test.rb
54
51
  - test/unit/etag_filter_test.rb
52
+ - test/unit/url_filters_test.rb
53
+ - test/unit/html_content_extractor_test.rb
55
54
  - test/unit/pdf_content_extractor_test.rb
55
+ - test/unit/word_content_extractor_test.rb
56
56
  - test/unit/file_document_test.rb
57
- - test/fixtures/pdf
57
+ - test/unit/crawler_fs_test.rb
58
58
  - test/fixtures/html
59
+ - test/fixtures/pdf
59
60
  - test/fixtures/word
60
- - test/fixtures/pdf/simple.pdf
61
61
  - test/fixtures/html/entities.html
62
- - test/fixtures/html/custom_tag_selectors.html
63
62
  - test/fixtures/html/simple.html
63
+ - test/fixtures/html/custom_tag_selectors.html
64
+ - test/fixtures/pdf/simple.pdf
64
65
  - test/fixtures/word/simple.doc
65
66
  - doc/examples
66
67
  - doc/examples/config.rb
67
- - TODO
68
68
  - LICENSE
69
- - README
69
+ - TODO
70
70
  - CHANGES
71
+ - README
71
72
  - install.rb
72
73
  - rakefile
73
74
  test_files: []
@@ -97,7 +98,7 @@ dependencies:
97
98
  requirements:
98
99
  - - ">="
99
100
  - !ruby/object:Gem::Version
100
- version: 0.3.2
101
+ version: 0.10.0
101
102
  version:
102
103
  - !ruby/object:Gem::Dependency
103
104
  name: rubyful_soup