xapian_db 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,35 @@
1
+ require "fileutils"
2
+
3
+ LANGUAGE_MAP = {:danish => :da,
4
+ :dutch => :nl,
5
+ :english => :en,
6
+ :finnish => :fi,
7
+ :french => :fr,
8
+ :german => :de,
9
+ :hungarian => :hu,
10
+ :italian => :it,
11
+ :norwegian => :no,
12
+ :portuguese => :pt,
13
+ :russian => :ru,
14
+ :spanish => :es,
15
+ :swedish => :sv}
16
+
17
+ # 1. Load the stop words files from snowball.tartarus.org
18
+ LANGUAGE_MAP.keys.reject{|k| k == :russian}.each { |l| system("curl http://snowball.tartarus.org/algorithms/%s/stop.txt | iconv -f ISO-8859-1 -t UTF-8 > %s.txt" % [l, l]) }
19
+ system("curl http://snowball.tartarus.org/algorithms/russian/stop.txt | iconv -f KOI8-R -t UTF-8 > russian.txt")
20
+
21
+ # 2. Clean up the files (remove comments) and write a new file with the iso name
22
+ LANGUAGE_MAP.keys.each do |lang|
23
+ open("#{LANGUAGE_MAP[lang]}.txt", "w") do |outfile|
24
+ open("#{lang}.txt", "r") do |infile|
25
+ while line = infile.gets
26
+ outfile.puts line.split(" ", 2).first.downcase.strip unless line =~ /^ +|^$|^\|/
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+ # 3. Remove the downloaded files
33
+ LANGUAGE_MAP.keys.each {|lang| FileUtils.rm_rf "#{lang}.txt"}
34
+
35
+
data/lib/xapian_db.rb CHANGED
@@ -1,17 +1,35 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'digest/sha1'
4
- require 'xapian'
5
- require 'yaml'
6
-
7
3
  # This is the top level module of xapian_db. It allows you to
8
4
  # configure XapianDB, create / open databases and perform
9
5
  # searches.
10
6
 
11
7
  # @author Gernot Kogler
12
8
 
9
+ require 'xapian'
10
+ require 'yaml'
11
+
13
12
  module XapianDb
14
13
 
14
+ # Supported languages
15
+ LANGUAGE_MAP = {:da => :danish,
16
+ :nl => :dutch,
17
+ :en => :english,
18
+ :fi => :finnish,
19
+ :fr => :french,
20
+ :de => :german2, # Normalises umlauts and ß
21
+ :hu => :hungarian,
22
+ :it => :italian,
23
+ :nb => :norwegian,
24
+ :nn => :norwegian,
25
+ :no => :norwegian,
26
+ :pt => :portuguese,
27
+ :ro => :romanian,
28
+ :ru => :russian,
29
+ :es => :spanish,
30
+ :sv => :swedish,
31
+ :tr => :turkish}
32
+
15
33
  # Global configuration for XapianDb. See {XapianDb::Config.setup}
16
34
  # for available options
17
35
  def self.setup(&block)
@@ -61,16 +79,9 @@ module XapianDb
61
79
 
62
80
  end
63
81
 
64
- require File.dirname(__FILE__) + '/xapian_db/config'
65
- require File.dirname(__FILE__) + '/xapian_db/adapters/generic_adapter'
66
- require File.dirname(__FILE__) + '/xapian_db/adapters/datamapper_adapter'
67
- require File.dirname(__FILE__) + '/xapian_db/adapters/active_record_adapter'
68
- require File.dirname(__FILE__) + '/xapian_db/index_writers/direct_writer'
69
- require File.dirname(__FILE__) + '/xapian_db/database'
70
- require File.dirname(__FILE__) + '/xapian_db/document_blueprint'
71
- require File.dirname(__FILE__) + '/xapian_db/indexer'
72
- require File.dirname(__FILE__) + '/xapian_db/query_parser'
73
- require File.dirname(__FILE__) + '/xapian_db/resultset'
82
+ do_not_require = %w(update_stopwords.rb railtie.rb)
83
+ files = Dir.glob("#{File.dirname(__FILE__)}/**/*.rb").reject{|path| do_not_require.include?(File.basename(path))}
84
+ files.each {|file| require file}
74
85
 
75
86
  # Configure XapianDB if we are in a Rails app
76
87
  require File.dirname(__FILE__) + '/xapian_db/railtie' if defined?(Rails)
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 3
8
- - 2
9
- version: 0.3.2
8
+ - 3
9
+ version: 0.3.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Gernot Kogler
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-10 00:00:00 +01:00
17
+ date: 2010-12-13 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -66,7 +66,24 @@ files:
66
66
  - lib/xapian_db/indexer.rb
67
67
  - lib/xapian_db/query_parser.rb
68
68
  - lib/xapian_db/railtie.rb
69
+ - lib/xapian_db/repositories/stemmer.rb
70
+ - lib/xapian_db/repositories/stopper.rb
69
71
  - lib/xapian_db/resultset.rb
72
+ - lib/xapian_db/stopwords/da.txt
73
+ - lib/xapian_db/stopwords/de.txt
74
+ - lib/xapian_db/stopwords/en.txt
75
+ - lib/xapian_db/stopwords/es.txt
76
+ - lib/xapian_db/stopwords/fi.txt
77
+ - lib/xapian_db/stopwords/fr.txt
78
+ - lib/xapian_db/stopwords/hu.txt
79
+ - lib/xapian_db/stopwords/it.txt
80
+ - lib/xapian_db/stopwords/nl.txt
81
+ - lib/xapian_db/stopwords/no.txt
82
+ - lib/xapian_db/stopwords/pt.txt
83
+ - lib/xapian_db/stopwords/README
84
+ - lib/xapian_db/stopwords/ru.txt
85
+ - lib/xapian_db/stopwords/sv.txt
86
+ - lib/xapian_db/stopwords/update_stopwords.rb
70
87
  - lib/xapian_db.rb
71
88
  - LICENSE
72
89
  - README.rdoc