rdig 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. data/CHANGES +7 -0
  2. data/README +1 -1
  3. data/lib/rdig.rb +2 -2
  4. data/lib/rdig/index.rb +11 -12
  5. data/rakefile +1 -1
  6. metadata +21 -20
data/CHANGES CHANGED
@@ -1,3 +1,10 @@
1
+ 0.3.2
2
+ - make RDig compatible with Ferret 0.10.x
3
+ - won't work any more with Ferret 0.9.x and before
4
+
5
+ 0.3.1
6
+ - Bug fix release: fixed handling of unparseable URLs
7
+
1
8
  0.3.0
2
9
  - file system crawling
3
10
  - optional url rewriting before indexing, e.g. for linking to results
data/README CHANGED
@@ -5,7 +5,7 @@ to help building a site search for web sites or intranets. Internally,
5
5
  Ferret is used for the full text indexing. After creating a config file
6
6
  for your site, the index can be built with a single call to rdig.
7
7
 
8
- RDig depends on Ferret (>= 0.3.2) and the RubyfulSoup library (>= 1.0.4).
8
+ RDig depends on Ferret (>= 0.10.0) and the RubyfulSoup library (>= 1.0.4).
9
9
 
10
10
  == basic usage
11
11
 
data/lib/rdig.rb CHANGED
@@ -24,7 +24,7 @@
24
24
  #++
25
25
  #
26
26
 
27
- RDIGVERSION = '0.3.1'
27
+ RDIGVERSION = '0.3.2'
28
28
 
29
29
 
30
30
  require 'thread'
@@ -142,7 +142,7 @@ module RDig
142
142
  :create => true,
143
143
  :handle_parse_errors => true,
144
144
  :analyzer => Ferret::Analysis::StandardAnalyzer.new,
145
- :occur_default => Ferret::Search::BooleanClause::Occur::MUST
145
+ :occur_default => :must
146
146
  )
147
147
  )
148
148
  end
data/lib/rdig/index.rb CHANGED
@@ -3,27 +3,26 @@ module RDig
3
3
 
4
4
  # used by the crawler to build the ferret index
5
5
  class Indexer
6
- include MonitorMixin, Ferret::Index, Ferret::Document
6
+ include MonitorMixin
7
7
 
8
8
  def initialize(settings)
9
9
  @config = settings
10
- @index_writer = IndexWriter.new(settings.path,
11
- :create => settings.create,
12
- :analyzer => settings.analyzer)
10
+ @index_writer = Ferret::Index::IndexWriter.new(
11
+ :path => settings.path,
12
+ :create => settings.create,
13
+ :analyzer => settings.analyzer)
13
14
  super() # scary, MonitorMixin won't initialize if we don't call super() here (parens matter)
14
15
  end
15
16
 
16
17
  def add_to_index(document)
17
18
  puts "add to index: #{document.uri.to_s}" if RDig::config.verbose
18
- doc = Ferret::Document::Document.new
19
19
  @config.rewrite_uri.call(document.uri) if @config.rewrite_uri
20
-
21
- doc << Field.new("url", document.uri.to_s,
22
- Field::Store::YES, Field::Index::TOKENIZED)
23
- doc << Field.new("title", document.title,
24
- Field::Store::YES, Field::Index::TOKENIZED)
25
- doc << Field.new("data", document.body,
26
- Field::Store::YES, Field::Index::TOKENIZED)
20
+ # all stored and tokenized, should be ferret defaults
21
+ doc = {
22
+ :url => document.uri.to_s,
23
+ :title => document.title,
24
+ :data => document.body
25
+ }
27
26
  synchronize do
28
27
  @index_writer << doc
29
28
  end
data/rakefile CHANGED
@@ -129,7 +129,7 @@ else
129
129
 
130
130
  #### Dependencies and requirements.
131
131
 
132
- s.add_dependency('ferret', '>= 0.3.2')
132
+ s.add_dependency('ferret', '>= 0.10.0')
133
133
  s.add_dependency('rubyful_soup', '>= 1.0.4')
134
134
  #s.requirements << ""
135
135
 
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
2
+ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: rdig
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.3.1
7
- date: 2006-07-26 00:00:00 +02:00
6
+ version: 0.3.2
7
+ date: 2006-10-09 00:00:00 +02:00
8
8
  summary: Ruby based web site indexing and searching library.
9
9
  require_paths:
10
10
  - lib
@@ -25,49 +25,50 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
+ post_install_message:
28
29
  authors:
29
30
  - Jens Kraemer
30
31
  files:
31
32
  - bin/rdig
32
33
  - lib/rdig
33
- - lib/rdig.rb
34
34
  - lib/htmlentities
35
- - lib/rdig/documents.rb
36
- - lib/rdig/file.rb
37
- - lib/rdig/content_extractors.rb
35
+ - lib/rdig.rb
36
+ - lib/rdig/crawler.rb
38
37
  - lib/rdig/search.rb
39
38
  - lib/rdig/highlight.rb
40
39
  - lib/rdig/index.rb
41
40
  - lib/rdig/url_filters.rb
42
- - lib/rdig/crawler.rb
43
- - lib/htmlentities/htmlentities.rb
44
- - lib/htmlentities/README
41
+ - lib/rdig/content_extractors.rb
42
+ - lib/rdig/documents.rb
43
+ - lib/rdig/file.rb
45
44
  - lib/htmlentities/CHANGES
46
45
  - lib/htmlentities/COPYING
46
+ - lib/htmlentities/README
47
+ - lib/htmlentities/htmlentities.rb
47
48
  - test/unit
48
49
  - test/fixtures
49
50
  - test/test_helper.rb
50
- - test/unit/html_content_extractor_test.rb
51
- - test/unit/url_filters_test.rb
52
- - test/unit/word_content_extractor_test.rb
53
- - test/unit/crawler_fs_test.rb
54
51
  - test/unit/etag_filter_test.rb
52
+ - test/unit/url_filters_test.rb
53
+ - test/unit/html_content_extractor_test.rb
55
54
  - test/unit/pdf_content_extractor_test.rb
55
+ - test/unit/word_content_extractor_test.rb
56
56
  - test/unit/file_document_test.rb
57
- - test/fixtures/pdf
57
+ - test/unit/crawler_fs_test.rb
58
58
  - test/fixtures/html
59
+ - test/fixtures/pdf
59
60
  - test/fixtures/word
60
- - test/fixtures/pdf/simple.pdf
61
61
  - test/fixtures/html/entities.html
62
- - test/fixtures/html/custom_tag_selectors.html
63
62
  - test/fixtures/html/simple.html
63
+ - test/fixtures/html/custom_tag_selectors.html
64
+ - test/fixtures/pdf/simple.pdf
64
65
  - test/fixtures/word/simple.doc
65
66
  - doc/examples
66
67
  - doc/examples/config.rb
67
- - TODO
68
68
  - LICENSE
69
- - README
69
+ - TODO
70
70
  - CHANGES
71
+ - README
71
72
  - install.rb
72
73
  - rakefile
73
74
  test_files: []
@@ -97,7 +98,7 @@ dependencies:
97
98
  requirements:
98
99
  - - ">="
99
100
  - !ruby/object:Gem::Version
100
- version: 0.3.2
101
+ version: 0.10.0
101
102
  version:
102
103
  - !ruby/object:Gem::Dependency
103
104
  name: rubyful_soup