picolena 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/History.txt +11 -0
  2. data/Manifest.txt +16 -4
  3. data/lib/picolena/picolena_generator.rb +0 -1
  4. data/lib/picolena/templates/app/helpers/documents_helper.rb +4 -0
  5. data/lib/picolena/templates/app/models/document.rb +21 -12
  6. data/lib/picolena/templates/app/models/finder.rb +38 -18
  7. data/lib/picolena/templates/app/models/indexer.rb +69 -89
  8. data/lib/picolena/templates/app/models/plain_text_extractor.rb +7 -7
  9. data/lib/picolena/templates/app/models/query.rb +4 -2
  10. data/lib/picolena/templates/app/views/documents/_document.html.haml +1 -0
  11. data/lib/picolena/templates/lang/ui/de.yml +3 -1
  12. data/lib/picolena/templates/lang/ui/en.yml +3 -1
  13. data/lib/picolena/templates/lang/ui/es.yml +3 -1
  14. data/lib/picolena/templates/lang/ui/fr.yml +3 -1
  15. data/lib/picolena/templates/lib/tasks/index.rake +3 -3
  16. data/lib/picolena/templates/public/images/flags/ar.png +0 -0
  17. data/lib/picolena/templates/public/images/flags/be.png +0 -0
  18. data/lib/picolena/templates/public/images/flags/ca.png +0 -0
  19. data/lib/picolena/templates/public/images/flags/de.png +0 -0
  20. data/lib/picolena/templates/public/images/flags/el.png +0 -0
  21. data/lib/picolena/templates/public/images/flags/en.png +0 -0
  22. data/lib/picolena/templates/public/images/flags/es.png +0 -0
  23. data/lib/picolena/templates/public/images/flags/fr.png +0 -0
  24. data/lib/picolena/templates/public/images/flags/ga.png +0 -0
  25. data/lib/picolena/templates/public/images/flags/hr.png +0 -0
  26. data/lib/picolena/templates/public/images/flags/it.png +0 -0
  27. data/lib/picolena/templates/public/images/flags/nl.png +0 -0
  28. data/lib/picolena/templates/public/images/flags/pl.png +0 -0
  29. data/lib/picolena/templates/public/images/flags/pt-br.png +0 -0
  30. data/lib/picolena/templates/public/images/flags/pt-pt.png +0 -0
  31. data/lib/picolena/templates/public/images/flags/readme.txt +9 -0
  32. data/lib/picolena/templates/spec/models/basic_finder_spec.rb +13 -10
  33. data/lib/picolena/templates/spec/models/finder_spec.rb +5 -5
  34. data/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb +7 -7
  35. data/lib/picolena/version.rb +1 -1
  36. data/tasks/hack.rake +2 -2
  37. data/website/index.html +1 -1
  38. data.tar.gz.sig +1 -2
  39. metadata +19 -6
  40. metadata.gz.sig +0 -0
  41. data/lib/picolena/templates/app/models/index_reader.rb +0 -54
  42. data/lib/picolena/templates/app/models/index_writer.rb +0 -33
  43. data/lib/picolena/templates/spec/models/index_reader_spec.rb +0 -7
  44. data/lib/picolena/templates/spec/models/index_writer_spec.rb +0 -7
data/History.txt CHANGED
@@ -1,3 +1,14 @@
1
+ == 0.1.5 2008-04-
2
+
3
+ * 1 major enhancement:
4
+ * yet another Indexer & Index rewrite
5
+
6
+ * 1 minor enhancement:
7
+ * flags to indicate found language
8
+
9
+ * bug fixes:
10
+ * No more (or just less?) index lock errors
11
+
1
12
  == 0.1.4 2008-04-23
2
13
  * 1 minor enhancement:
3
14
  * minimal MacOS support
data/Manifest.txt CHANGED
@@ -22,8 +22,6 @@ lib/picolena/templates/app/helpers/application_helper.rb
22
22
  lib/picolena/templates/app/helpers/documents_helper.rb
23
23
  lib/picolena/templates/app/models/document.rb
24
24
  lib/picolena/templates/app/models/finder.rb
25
- lib/picolena/templates/app/models/index_reader.rb
26
- lib/picolena/templates/app/models/index_writer.rb
27
25
  lib/picolena/templates/app/models/indexer.rb
28
26
  lib/picolena/templates/app/models/plain_text_extractor.rb
29
27
  lib/picolena/templates/app/models/query.rb
@@ -75,6 +73,22 @@ lib/picolena/templates/public/favicon.ico
75
73
  lib/picolena/templates/public/help/PicolenaHowTo-de.pdf
76
74
  lib/picolena/templates/public/help/PicolenaHowTo-de.tex
77
75
  lib/picolena/templates/public/images/bg.gif
76
+ lib/picolena/templates/public/images/flags/ar.png
77
+ lib/picolena/templates/public/images/flags/be.png
78
+ lib/picolena/templates/public/images/flags/ca.png
79
+ lib/picolena/templates/public/images/flags/de.png
80
+ lib/picolena/templates/public/images/flags/el.png
81
+ lib/picolena/templates/public/images/flags/en.png
82
+ lib/picolena/templates/public/images/flags/es.png
83
+ lib/picolena/templates/public/images/flags/fr.png
84
+ lib/picolena/templates/public/images/flags/ga.png
85
+ lib/picolena/templates/public/images/flags/hr.png
86
+ lib/picolena/templates/public/images/flags/it.png
87
+ lib/picolena/templates/public/images/flags/nl.png
88
+ lib/picolena/templates/public/images/flags/pl.png
89
+ lib/picolena/templates/public/images/flags/pt-br.png
90
+ lib/picolena/templates/public/images/flags/pt-pt.png
91
+ lib/picolena/templates/public/images/flags/readme.txt
78
92
  lib/picolena/templates/public/images/icons/cad.png
79
93
  lib/picolena/templates/public/images/icons/code.png
80
94
  lib/picolena/templates/public/images/icons/doc.png
@@ -120,8 +134,6 @@ lib/picolena/templates/spec/models/document_spec.rb
120
134
  lib/picolena/templates/spec/models/finder_spec.rb
121
135
  lib/picolena/templates/spec/models/host_indexing_system_spec.rb
122
136
  lib/picolena/templates/spec/models/index_directories_spec.rb
123
- lib/picolena/templates/spec/models/index_reader_spec.rb
124
- lib/picolena/templates/spec/models/index_writer_spec.rb
125
137
  lib/picolena/templates/spec/models/indexer_spec.rb
126
138
  lib/picolena/templates/spec/models/plain_text_extractor_spec.rb
127
139
  lib/picolena/templates/spec/models/query_spec.rb
@@ -163,7 +163,6 @@ EOS
163
163
  spec/test_dirs/indexed/yet_another_dir
164
164
  spec/test_dirs/not_indexed
165
165
  spec/views
166
- spec/views/application
167
166
  tmp/cache
168
167
  tmp/ferret_indexes
169
168
  tmp/pids
@@ -37,6 +37,10 @@ module DocumentsHelper
37
37
  }) if document.matching_content
38
38
  end
39
39
 
40
+ def language_icon_for(document)
41
+ (lang=document.language) && image_tag("flags/#{lang}.png")
42
+ end
43
+
40
44
  # Returns icon and filename for any given document.
41
45
  def icon_and_filename_for(document)
42
46
  [icon_for(document.extname),document.filename].join(" ")
@@ -71,21 +71,33 @@ class Document
71
71
  # Returns the last modification date before the document got indexed.
72
72
  # Useful to know how old a document is, and to which version the cache corresponds.
73
73
  def date
74
- from_index[:date].sub(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})/,'\1-\2-\3 \4:\5:\6')
74
+ from_index[:modified].sub(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})/,'\1-\2-\3 \4:\5:\6')
75
75
  end
76
76
 
77
77
  def mtime
78
- from_index[:date].to_i
78
+ from_index[:modified].to_i
79
79
  end
80
80
 
81
81
  # Returns language.
82
- def lang
83
- from_index[:lang]
82
+ def language
83
+ from_index[:language]
84
84
  end
85
85
 
86
86
  # Returns the id with which the document is indexed.
87
87
  def index_id
88
- @index_id ||= Document.find_by_complete_path(complete_path).index_id
88
+ @index_id ||= Finder.term_search(:complete_path, complete_path).doc
89
+ end
90
+
91
+ # Fields that are shared between every document.
92
+ def self.default_fields_for(complete_path)
93
+ {
94
+ :complete_path => complete_path,
95
+ :probably_unique_id => complete_path.base26_hash,
96
+ :filename => File.basename(complete_path),
97
+ :basename => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '),
98
+ :filetype => File.extname(complete_path),
99
+ :modified => File.mtime(complete_path).strftime("%Y%m%d%H%M%S")
100
+ }
89
101
  end
90
102
 
91
103
  private
@@ -93,17 +105,14 @@ class Document
93
105
  # Retrieves the document from the index.
94
106
  # Useful to get meta-info about it.
95
107
  def from_index
96
- IndexReader.new[index_id]
108
+ Indexer.index[index_id]
97
109
  end
98
110
 
99
111
  def self.find_by_unique_id(some_id)
100
- Finder.new("probably_unique_id:"<<some_id).matching_document
112
+ doc_id=Finder.term_search(:probably_unique_id, some_id).doc
113
+ new(Indexer.index[doc_id][:complete_path])
101
114
  end
102
-
103
- def self.find_by_complete_path(complete_path)
104
- Finder.new('complete_path:"'<<complete_path<<'"').matching_document
105
- end
106
-
115
+
107
116
  def in_indexed_directory?
108
117
  !indexed_directory.nil?
109
118
  end
@@ -2,18 +2,16 @@ class Finder
2
2
  attr_reader :query
3
3
 
4
4
  def index
5
- # caching index @@index ||=
6
- # causes ferret-0.11.6/lib/ferret/index.rb:768: [BUG] Segmentation fault
7
- IndexReader.new
5
+ @@index ||= Indexer.index
8
6
  end
9
7
 
10
8
  def initialize(raw_query,page=1,results_per_page=Picolena::ResultsPerPage)
11
9
  @query = Query.extract_from(raw_query)
12
10
  @raw_query= raw_query
13
- IndexReader.ensure_existence
11
+ Indexer.ensure_index_existence
14
12
  @per_page=results_per_page
15
13
  @offset=(page.to_i-1)*results_per_page
16
- index.should_have_documents
14
+ index_should_have_documents
17
15
  end
18
16
 
19
17
  def execute!
@@ -31,9 +29,9 @@ class Finder
31
29
  found_doc.score=score
32
30
  found_doc.index_id=index_id
33
31
  @matching_documents<<found_doc
34
- rescue Errno::ENOENT
35
- #"File has been moved/deleted!"
36
- end
32
+ rescue Errno::ENOENT
33
+ #"File has been moved/deleted!"
34
+ end
37
35
  }
38
36
  @executed=true
39
37
  @time_needed=Time.now-start
@@ -60,14 +58,36 @@ class Finder
60
58
  # Returns matching document for any given query only if
61
59
  # exactly one document is found.
62
60
  # Raises otherwise.
63
- def matching_document
64
- case matching_documents.size
65
- when 0
66
- raise IndexError, "No document found"
67
- when 1
68
- matching_documents.first
69
- else
70
- raise IndexError, "More than one document found"
71
- end
72
- end
61
+ def matching_document
62
+ case matching_documents.size
63
+ when 0
64
+ raise IndexError, "No document found"
65
+ when 1
66
+ matching_documents.first
67
+ else
68
+ raise IndexError, "More than one document found"
69
+ end
70
+ end
71
+
72
+ class<<self
73
+ def searcher
74
+ @@searcher ||= Ferret::Search::Searcher.new(Picolena::IndexSavePath)
75
+ end
76
+
77
+ def term_search(field,term)
78
+ query = Ferret::Search::TermQuery.new(field,term)
79
+ searcher.search(query).hits.first
80
+ end
81
+
82
+ def reload!
83
+ @@searcher = nil
84
+ @@index = nil
85
+ end
86
+ end
87
+
88
+ private
89
+
90
+ def index_should_have_documents
91
+ raise IndexError, "no document found" unless index.size > 0
92
+ end
73
93
  end
@@ -5,54 +5,22 @@ class Indexer
5
5
  @@max_threads_number = 8
6
6
 
7
7
  class << self
8
- def fields_for(complete_path)
9
- {
10
- :complete_path => complete_path,
11
- :probably_unique_id => complete_path.base26_hash,
12
- :file => File.basename(complete_path),
13
- :basename => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '),
14
- :filetype => File.extname(complete_path),
15
- :date => File.mtime(complete_path).strftime("%Y%m%d%H%M%S")
16
- }
17
- end
18
-
19
- def index_every_directory(update=true)
8
+ def index_every_directory(remove_first=false)
9
+ clear! if remove_first
10
+ # Forces Finder.searcher and Finder.index to be reloaded, by removing them from the cache.
11
+ Finder.reload!
20
12
  log :debug => "Indexing every directory"
21
-
22
-
23
13
  start=Time.now
24
- @update = update
25
- reset! unless update
26
-
27
14
  Picolena::IndexedDirectories.each{|dir, alias_dir|
28
15
  index_directory_with_multithreads(dir)
29
16
  }
30
- # FIXME: with those 2 lines,
17
+ log :debug => "Now optimizing index"
31
18
  writer.optimize
32
- writer.close
33
- # launching Indexer.index_every_directory twice in a row
34
- # would raise a SEGFAULT:
35
- # picolena/lib/picolena/templates/app/models/indexer.rb:27: [BUG] Segmentation fault
36
- # ruby 1.8.6 (2007-06-07) [i486-linux]
37
- #
38
- # Aborted (core dumped)
39
- #
40
- # But without those 2 lines, specs don't pass anymore.
41
- #
42
19
  log :debug => "Indexing done in #{Time.now-start} s."
43
20
  end
44
21
 
45
22
  def index_directory_with_multithreads(dir)
46
- # FIXME: Don't know why, but if more than one thread is created while update the index,
47
- # indexer raises:
48
- #
49
- # current thread not owner
50
- # /usr/lib/ruby/1.8/monitor.rb:278:in `mon_check_owner'
51
- # /home/www/picolena/lib/picolena/templates/lib/core_exts.rb:32:in `join'
52
- # ...
53
- #
54
- # So Index creation is multithreaded, Index update is monothreaded.
55
- threads_number = @update ? 1 : @@max_threads_number
23
+ threads_number = @@max_threads_number
56
24
  log :debug => "Indexing #{dir}, #{threads_number} thread(s)"
57
25
 
58
26
  indexing_list=Dir[File.join(dir,"**/*")].select{|filename|
@@ -61,82 +29,94 @@ class Indexer
61
29
 
62
30
  indexing_list_chunks=indexing_list.in_transposed_slices(threads_number)
63
31
 
32
+ # It initializes an IndexWriter before launching multithreaded
33
+ # indexing. Otherwise, two threads could try to instantiate
34
+ # an IndexWriter at the same time, and get a
35
+ # Ferret::Store::Lock::LockError
36
+ writer
37
+
64
38
  indexing_list_chunks.each_with_thread{|chunk|
65
39
  chunk.each{|filename|
66
- add_or_update_file(filename)
40
+ add_file(filename)
67
41
  }
68
42
  }
69
43
  end
70
44
 
71
- def add_or_update_file(complete_path)
72
- should_be_added = true
73
- if @update then
74
- log :debug => "What to do with #{complete_path} ?"
75
- occurences = reader.occurences_number(complete_path)
76
- log :debug => "\tappears #{occurences} times in the index"
77
- case occurences
78
- when 0
79
- #Nothing to do here, the file will be added.
80
- when 1
81
- d=Document.find_by_complete_path(complete_path)
82
- if File.mtime(complete_path).strftime("%Y%m%d%H%M%S").to_i > d.mtime then
83
- log :debug => "\thas been modified"
84
- delete_file(complete_path)
85
- else
86
- should_be_added = false
87
- log :debug => "\thas not been modified. leaving it"
88
- end
89
- else
90
- delete_file(complete_path)
91
- end
92
- end
93
- add_file(complete_path) if should_be_added
94
- end
95
-
96
45
  def add_file(complete_path)
97
- log :debug => "Adding #{complete_path}"
98
- mime_type=File.mime(complete_path)
99
- fields = fields_for(complete_path)
100
-
101
- begin
102
- text, lang = PlainTextExtractor.extract_content_and_language_from(complete_path)
103
- raise "\tempty document #{complete_path}" if text.strip.empty?
104
- fields[:content] = text
105
- log :debug => "language found: #{lang}" if lang
106
- fields[:lang] = lang
46
+ default_fields = Document.default_fields_for(complete_path)
47
+ begin
48
+ document = PlainTextExtractor.extract_content_and_language_from(complete_path)
49
+ raise "empty document #{complete_path}" if document[:content].strip.empty?
50
+ document.merge! default_fields
51
+ log :debug => ["Added : #{complete_path}",document[:language] ? " (#{document[:language]})" : ""].join
107
52
  rescue => e
108
53
  log :debug => "\tindexing without content: #{e.message}"
54
+ document = default_fields
109
55
  end
110
-
111
- writer << fields
56
+ writer << document
112
57
  end
113
58
 
114
- def writer
115
- @@writer ||= IndexWriter.new
59
+ # Ensures writer is closed, and removes every index file for RAILS_ENV.
60
+ def clear!(all=false)
61
+ close
62
+ to_remove=all ? Picolena::IndexesSavePath : Picolena::IndexSavePath
63
+ Dir.glob(File.join(to_remove,'**/*')).each{|f| FileUtils.rm(f) if File.file?(f)}
116
64
  end
117
65
 
118
- def reader
119
- @@reader ||= IndexReader.new
66
+ # Closes the writer and
67
+ # ensures that a new IndexWriter is instantiated next time writer is called.
68
+ def close
69
+ @@writer.close rescue nil
70
+ # Ferret will SEGFAULT otherwise.
71
+ @@writer = nil
120
72
  end
121
73
 
122
- def reset!
123
- log :debug => "Resetting Index"
124
- @@writer=nil
125
- @@reader=nil
126
- IndexWriter.remove
74
+ # Only one IndexWriter should be instantiated.
75
+ # If one already exists, returns it.
76
+ # Creates it otherwise.
77
+ def writer
78
+ @@writer ||= Ferret::Index::IndexWriter.new(default_index_params)
127
79
  end
128
80
 
129
- def delete_file(complete_path)
130
- log :debug => "\tRemoving from index"
131
- reader.delete_by_complete_path(complete_path)
81
+ def index
82
+ Ferret::Index::Index.new(default_index_params)
83
+ end
84
+
85
+ def ensure_index_existence
86
+ index_every_directory(:remove_first) unless index_exists? or RAILS_ENV=="production"
132
87
  end
133
88
 
134
89
  private
135
90
 
91
+ def index_exists?
92
+ index_filename and File.exists?(index_filename)
93
+ end
94
+
95
+ def index_filename
96
+ Dir.glob(File.join(Picolena::IndexSavePath,'*.cfs')).first
97
+ end
98
+
136
99
  def log(hash)
137
100
  hash.each{|level,message|
138
101
  IndexerLogger.send(level,message)
139
102
  }
140
- end
103
+ end
104
+
105
+ def default_index_params
106
+ {:path => Picolena::IndexSavePath, :analyzer => Picolena::Analyzer, :field_infos => default_field_infos}
107
+ end
108
+
109
+ def default_field_infos
110
+ returning Ferret::Index::FieldInfos.new do |field_infos|
111
+ field_infos.add_field(:complete_path, :store => :yes, :index => :untokenized)
112
+ field_infos.add_field(:content, :store => :yes, :index => :yes)
113
+ field_infos.add_field(:basename, :store => :no, :index => :yes, :boost => 1.5)
114
+ field_infos.add_field(:filename, :store => :no, :index => :yes, :boost => 1.5)
115
+ field_infos.add_field(:filetype, :store => :no, :index => :yes, :boost => 1.5)
116
+ field_infos.add_field(:modified, :store => :yes, :index => :untokenized)
117
+ field_infos.add_field(:probably_unique_id, :store => :no, :index => :yes)
118
+ field_infos.add_field(:language, :store => :yes, :index => :yes)
119
+ end
120
+ end
141
121
  end
142
122
  end
@@ -109,12 +109,12 @@ class PlainTextExtractor
109
109
  # and if probability score is higher than 90%.
110
110
  def extract_content_and_language
111
111
  content=extract_content
112
- return [content, nil] unless [# Is LanguageRecognition turned on? (cf config/custom/picolena.rb)
113
- Picolena::UseLanguageRecognition,
114
- # Is a language guesser already installed?
115
- PlainTextExtractor.language_guesser,
116
- # Language recognition is too unreliable for small files.
117
- content.size > 500].all?
112
+ return {:content => content} unless [# Is LanguageRecognition turned on? (cf config/custom/picolena.rb)
113
+ Picolena::UseLanguageRecognition,
114
+ # Is a language guesser already installed?
115
+ PlainTextExtractor.language_guesser,
116
+ # Language recognition is too unreliable for small files.
117
+ content.size > 500].all?
118
118
  language=IO.popen(PlainTextExtractor.language_guesser,'w+'){|lang_guesser|
119
119
  lang_guesser.write content
120
120
  lang_guesser.close_write
@@ -125,6 +125,6 @@ class PlainTextExtractor
125
125
  lang unless score<0.9
126
126
  end
127
127
  }
128
- [content,language]
128
+ {:content => content, :language => language}
129
129
  end
130
130
  end
@@ -13,9 +13,11 @@ class Query
13
13
  /\b#{:AND.l}\b/=>'AND',
14
14
  /\b#{:OR.l}\b/=>'OR',
15
15
  /\b#{:NOT.l}\b/=>'NOT',
16
+ /(#{:filename.l}):/=>'filename:',
16
17
  /(#{:filetype.l}):/=>'filetype:',
17
18
  /#{:content.l}:/ => 'content:',
18
- /#{:date.l}:/ => 'date:',
19
+ /(#{:modified.l}):/ => 'modified:',
20
+ /(#{:language.l}):/ => 'language:',
19
21
  /\b#{:LIKE.l}\s+(\S+)/=>'\1~'
20
22
  }
21
23
  to_en.inject(raw_query){|mem,non_english_to_english_keyword|
@@ -25,7 +27,7 @@ class Query
25
27
 
26
28
  # Instantiates a QueryParser once, and keeps it in cache.
27
29
  def parser
28
- @@parser ||= Ferret::QueryParser.new(:fields => [:content, :file, :basename, :filetype, :date], :or_default => false, :analyzer=>Picolena::Analyzer)
30
+ @@parser ||= Ferret::QueryParser.new(:fields => [:content, :filename, :basename, :filetype, :modified], :or_default => false, :analyzer=>Picolena::Analyzer)
29
31
  end
30
32
  end
31
33
  end
@@ -1,5 +1,6 @@
1
1
  %h2
2
2
  =link_to icon_and_filename_for(document), download_document_path(document.probably_unique_id)
3
+ =language_icon_for(document)
3
4
  %small=number_to_percentage(document.score*100, :precision=>1)
4
5
  =highlight_matching_content(document)
5
6
  %p=link_to_containing_directory(document)
@@ -19,6 +19,8 @@ NOT: NICHT
19
19
  LIKE: WIE
20
20
 
21
21
  ## Fields
22
+ filename: filename|file|datei
22
23
  filetype: erweiterung|ext
23
24
  content: inhalt
24
- date: jahr|zeit
25
+ modified: jahr|zeit|geändert
26
+ language: lang|sprache
@@ -19,6 +19,8 @@ NOT: NOT
19
19
  LIKE: LIKE
20
20
 
21
21
  ## Fields
22
+ filename: filename|file
22
23
  filetype: filetype|ext
23
24
  content: content
24
- date: year|date
25
+ modified: year|date|modified
26
+ language: lang|language
@@ -19,6 +19,8 @@ NOT: "NO"
19
19
  LIKE: COMO
20
20
 
21
21
  ## Fields
22
+ filename: filename|file|archivo
22
23
  filetype: extensión|ext
23
24
  content: contenido
24
- date: fecha|año|anho
25
+ modified: fecha|año|anho|modificado
26
+ language: lang|idioma
@@ -19,6 +19,8 @@ NOT: NON
19
19
  LIKE: COMME
20
20
 
21
21
  ## Fields
22
+ filename: filename|file|fichier
22
23
  filetype: extension|ext
23
24
  content: contenu
24
- date: année|date|annee
25
+ modified: année|date|annee|modifie
26
+ language: lang|langue
@@ -2,17 +2,17 @@ desc 'Ferret index maintenance tasks'
2
2
  namespace :index do
3
3
  desc 'Clear indexes'
4
4
  task :clear => :environment do
5
- IndexWriter.remove
5
+ Indexer.clear! :all
6
6
  end
7
7
 
8
8
  desc 'Create index'
9
9
  task :create => :environment do
10
- Indexer.index_every_directory(update=false)
10
+ Indexer.index_every_directory(remove_first=true)
11
11
  end
12
12
 
13
13
  desc 'Update index'
14
14
  task :update => :environment do
15
- Indexer.index_every_directory(update=true)
15
+ Indexer.index_every_directory
16
16
  end
17
17
 
18
18
  # Search index with query "some query" :
@@ -0,0 +1,9 @@
1
+ Flag icons - http://www.famfamfam.com
2
+
3
+ These icons are public domain, and as such are free for any use (attribution appreciated but not required).
4
+
5
+ Note that these flags are named using the ISO3166-1 alpha-2 country codes where appropriate. A list of codes can be found at http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
6
+
7
+ If you find these icons useful, please donate via paypal to mjames@gmail.com (or click the donate button available at http://www.famfamfam.com/lab/icons/silk)
8
+
9
+ Contact: mjames@gmail.com
@@ -10,14 +10,14 @@ describe "Finder without index on disk" do
10
10
  end
11
11
 
12
12
  before(:each) do
13
- IndexWriter.remove
13
+ Indexer.clear!
14
14
  end
15
15
 
16
16
  it "should create index" do
17
17
  Picolena::IndexedDirectories.replace({'spec/test_dirs/indexed/just_one_doc'=>'//justonedoc/'})
18
- lambda {@finder_with_new_index=Finder.new("test moi")}.should change(IndexReader, :exists?).from(false).to(true)
18
+ lambda {@finder_with_new_index=Finder.new("test moi")}.should change(Indexer, :index_exists?).from(false).to(true)
19
19
  File.exists?(File.join(@new_index_path,'_0.cfs')).should be_true
20
- IndexReader.new.size.should >0
20
+ Indexer.index.size.should >0
21
21
  end
22
22
 
23
23
  it "should raise if index is still empty after trying to create it" do
@@ -35,16 +35,19 @@ end
35
35
 
36
36
  fields={
37
37
  # description => key
38
- :content=>:content,
39
- :basename=>:basename,
40
- :filename=>:file,
41
- :extension => :filetype,
42
- :modification_time=>:date
38
+ :content => :content,
39
+ :complete_path => :complete_path,
40
+ :basename => :basename,
41
+ :filename => :filename,
42
+ :extension => :filetype,
43
+ :modification_time => :modified,
44
+ :probably_unique_id => :probably_unique_id,
45
+ :language => :language
43
46
  }
44
47
 
45
48
  describe "Basic Finder" do
46
49
  before(:all) do
47
- Indexer.index_every_directory(update=false)
50
+ Indexer.index_every_directory(remove_first=true)
48
51
  end
49
52
 
50
53
  it "should accept one parameter as query, and 2 optionals for paginating" do
@@ -82,7 +85,7 @@ describe "Basic Finder" do
82
85
 
83
86
  fields.each_pair do |description,field_name|
84
87
  it "should index #{description} as :#{field_name}" do
85
- IndexReader.new.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
88
+ Indexer.index.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
86
89
  end
87
90
  end
88
91
 
@@ -21,7 +21,7 @@ describe Finder do
21
21
  File.utime(0, once_upon_a_time, 'spec/test_dirs/indexed/basic/basic.pdf')
22
22
  File.utime(0, a_bit_later, 'spec/test_dirs/indexed/yet_another_dir/office2003-word-template.dot')
23
23
  File.utime(0, nineties, 'spec/test_dirs/indexed/others/placeholder.txt')
24
- Indexer.index_every_directory(update=false)
24
+ Indexer.index_every_directory(remove_first=true)
25
25
  end
26
26
 
27
27
  it "should find documents according to their basename when specified with basename:query" do
@@ -30,8 +30,8 @@ describe Finder do
30
30
  matching_documents_filename.should include("crossed.text")
31
31
  end
32
32
 
33
- it "should find documents according to their filename when specified with file:query" do
34
- Finder.new("file:crossed.text").matching_documents.collect{|d| d.content}.should include("txt inside!")
33
+ it "should find documents according to their filename when specified with file:query or filename:query" do
34
+ Finder.new("filename:crossed.text").matching_documents.collect{|d| d.content}.should include("txt inside!")
35
35
  Finder.new("file:crossed.txt").matching_documents.collect{|d| d.content}.should include("text inside!")
36
36
  end
37
37
 
@@ -47,9 +47,9 @@ describe Finder do
47
47
  end
48
48
 
49
49
  it "should give a boost to basename, filename and filetype in index" do
50
- index=IndexReader.new
50
+ index=Indexer.index
51
51
  index.field_infos[:basename].boost.should > 1.0
52
- index.field_infos[:file].boost.should > 1.0
52
+ index.field_infos[:filename].boost.should > 1.0
53
53
  index.field_infos[:filetype].boost.should > 1.0
54
54
  end
55
55
 
@@ -2,7 +2,7 @@ require File.dirname(__FILE__) + '/../spec_helper'
2
2
 
3
3
  describe "PlainTextExtractors" do
4
4
  before(:all) do
5
- IndexReader.ensure_existence
5
+ Indexer.ensure_index_existence
6
6
  end
7
7
 
8
8
  PlainTextExtractor.all.each{|extractor|
@@ -29,14 +29,14 @@ describe "PlainTextExtractors" do
29
29
  }
30
30
 
31
31
  it "should guess language when enough content is available" do
32
- Document.new("spec/test_dirs/indexed/lang/goethe").lang.should == "de"
33
- Document.new("spec/test_dirs/indexed/lang/shakespeare").lang.should == "en"
34
- Document.new("spec/test_dirs/indexed/lang/lorca").lang.should == "es"
35
- Document.new("spec/test_dirs/indexed/lang/hugo").lang.should == "fr"
32
+ Document.new("spec/test_dirs/indexed/lang/goethe").language.should == "de"
33
+ Document.new("spec/test_dirs/indexed/lang/shakespeare").language.should == "en"
34
+ Document.new("spec/test_dirs/indexed/lang/lorca").language.should == "es"
35
+ Document.new("spec/test_dirs/indexed/lang/hugo").language.should == "fr"
36
36
  end
37
37
 
38
38
  it "should not try to guess language when file is too small" do
39
- Document.new("spec/test_dirs/indexed/basic/hello.rb").lang.should be_empty
40
- Document.new("spec/test_dirs/indexed/README").lang.should be_empty
39
+ Document.new("spec/test_dirs/indexed/basic/hello.rb").language.should be_nil
40
+ Document.new("spec/test_dirs/indexed/README").language.should be_nil
41
41
  end
42
42
  end
@@ -2,7 +2,7 @@ module Picolena #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 4
5
+ TINY = 5
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/tasks/hack.rake CHANGED
@@ -1,9 +1,9 @@
1
1
  desc 'Create development picolena structure inside lib/picolena/templates'
2
- task :lets_hack do
2
+ task :lets_hack => :clean do
3
3
  picolena_root=File.join(File.dirname(__FILE__),'..')
4
4
  Dir.chdir(picolena_root){
5
5
  # Doesn't overwrite any file, Doesn't create any index, Doesn't launch any spec.
6
- system("ruby bin/picolena lib/picolena/templates/spec/test_dirs/indexed --skip --no-index --no-spec --destination=lib/picolena/templates")
6
+ system("ruby bin/picolena lib/picolena/templates/spec/test_dirs/indexed --no-index --no-spec --destination=lib/picolena/templates")
7
7
  }
8
8
  puts <<-EXPLAIN
9
9
 
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>Picolena</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/picolena"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.4</a>
36
+ <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.5</a>
37
37
  </div>
38
38
  <h1>&#x2192; &#8216;picolena&#8217;</h1>
39
39
 
data.tar.gz.sig CHANGED
@@ -1,2 +1 @@
1
- .��@�:���N6JQp9V"QT�k�7~4*�D��w��u���%v��[��rY���hBt:Cv=�,8ڽ��c���;I��V[$y�nj�ϓέN�3��x+��yCQ^ہ�C(L)�O7�-��2ZVL]���i~��JK"8F�|��:�eT��Vp��ߋU��] ��
2
- ��V���[;#̧KM���$�;=X�~�>���� wYI7��3ksv��A߶� ��0�GZTi7$�����>@
1
+ B 8Ǣ�����ԝ�ŗFAsέ�%l�ѵ�Aw�k>�6�w���|ĝW^9>]���k��i����I٤�e�Z7٭Px���UK��+r�>P��al�<�T+eL@�HD�!�@��X�nV鐎wa<��b臋�g����,q���m�{i��2����#�m�=�܈ϲH'Wmá؝=cm��ݔ�^��㩫҃L=�ˁ�"r L{7�{R'4 �������k����hkx�����=��6�j
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picolena
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Duminil
@@ -30,7 +30,7 @@ cert_chain:
30
30
  qvI9FgPZ1QTG5uZAlBbk6d6JU2XfpA==
31
31
  -----END CERTIFICATE-----
32
32
 
33
- date: 2008-04-23 00:00:00 +02:00
33
+ date: 2008-04-25 00:00:00 +02:00
34
34
  default_executable:
35
35
  dependencies:
36
36
  - !ruby/object:Gem::Dependency
@@ -107,6 +107,7 @@ extra_rdoc_files:
107
107
  - History.txt
108
108
  - Manifest.txt
109
109
  - README.txt
110
+ - lib/picolena/templates/public/images/flags/readme.txt
110
111
  - lib/picolena/templates/public/robots.txt
111
112
  - lib/picolena/templates/spec/test_dirs/indexed/basic/crossed.txt
112
113
  - lib/picolena/templates/spec/test_dirs/indexed/basic/plain.txt
@@ -156,8 +157,6 @@ files:
156
157
  - lib/picolena/templates/app/helpers/documents_helper.rb
157
158
  - lib/picolena/templates/app/models/document.rb
158
159
  - lib/picolena/templates/app/models/finder.rb
159
- - lib/picolena/templates/app/models/index_reader.rb
160
- - lib/picolena/templates/app/models/index_writer.rb
161
160
  - lib/picolena/templates/app/models/indexer.rb
162
161
  - lib/picolena/templates/app/models/plain_text_extractor.rb
163
162
  - lib/picolena/templates/app/models/query.rb
@@ -209,6 +208,22 @@ files:
209
208
  - lib/picolena/templates/public/help/PicolenaHowTo-de.pdf
210
209
  - lib/picolena/templates/public/help/PicolenaHowTo-de.tex
211
210
  - lib/picolena/templates/public/images/bg.gif
211
+ - lib/picolena/templates/public/images/flags/ar.png
212
+ - lib/picolena/templates/public/images/flags/be.png
213
+ - lib/picolena/templates/public/images/flags/ca.png
214
+ - lib/picolena/templates/public/images/flags/de.png
215
+ - lib/picolena/templates/public/images/flags/el.png
216
+ - lib/picolena/templates/public/images/flags/en.png
217
+ - lib/picolena/templates/public/images/flags/es.png
218
+ - lib/picolena/templates/public/images/flags/fr.png
219
+ - lib/picolena/templates/public/images/flags/ga.png
220
+ - lib/picolena/templates/public/images/flags/hr.png
221
+ - lib/picolena/templates/public/images/flags/it.png
222
+ - lib/picolena/templates/public/images/flags/nl.png
223
+ - lib/picolena/templates/public/images/flags/pl.png
224
+ - lib/picolena/templates/public/images/flags/pt-br.png
225
+ - lib/picolena/templates/public/images/flags/pt-pt.png
226
+ - lib/picolena/templates/public/images/flags/readme.txt
212
227
  - lib/picolena/templates/public/images/icons/cad.png
213
228
  - lib/picolena/templates/public/images/icons/code.png
214
229
  - lib/picolena/templates/public/images/icons/doc.png
@@ -254,8 +269,6 @@ files:
254
269
  - lib/picolena/templates/spec/models/finder_spec.rb
255
270
  - lib/picolena/templates/spec/models/host_indexing_system_spec.rb
256
271
  - lib/picolena/templates/spec/models/index_directories_spec.rb
257
- - lib/picolena/templates/spec/models/index_reader_spec.rb
258
- - lib/picolena/templates/spec/models/index_writer_spec.rb
259
272
  - lib/picolena/templates/spec/models/indexer_spec.rb
260
273
  - lib/picolena/templates/spec/models/plain_text_extractor_spec.rb
261
274
  - lib/picolena/templates/spec/models/query_spec.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,54 +0,0 @@
1
- class IndexReader < Ferret::Index::Index
2
- def initialize(params={})
3
- # Add needed parameters
4
- params.merge!(:path => Picolena::IndexSavePath, :analyzer => Picolena::Analyzer)
5
- # Creates the IndexReader
6
- super(params)
7
- end
8
-
9
- # Returns the number of times a file is present in the index.
10
- # index_reader.doc_freq(field, term) → integer
11
- # Return the number of documents in which the term term appears in the field field.
12
- def occurences_number(complete_path)
13
- # complete_path_query = Ferret::Search::TermQuery.new(:complete_path, complete_path)
14
- search_by_complete_path(complete_path).total_hits
15
- end
16
-
17
- def search_by_complete_path(complete_path)
18
- search('complete_path:"'<<complete_path<<'"')
19
- end
20
-
21
- def delete_by_complete_path(complete_path)
22
- search_by_complete_path(complete_path).hits.each{|hit|
23
- delete(hit.doc)
24
- }
25
- close
26
- end
27
-
28
-
29
- # Validation methods.
30
-
31
- def should_have_documents
32
- raise IndexError, "no document found" unless has_documents?
33
- end
34
-
35
- # Returns true if there's at least one document indexed.
36
- def has_documents?
37
- size>0
38
- end
39
-
40
- class<<self
41
-
42
- def ensure_existence
43
- Indexer.index_every_directory(update=false) unless exists? or RAILS_ENV=="production"
44
- end
45
-
46
- def exists?
47
- filename and File.exists?(filename)
48
- end
49
-
50
- def filename
51
- Dir.glob(File.join(Picolena::IndexSavePath,'*.cfs')).first
52
- end
53
- end
54
- end
@@ -1,33 +0,0 @@
1
- class IndexWriter < Ferret::Index::IndexWriter
2
- def initialize(params={})
3
- # Add needed parameters
4
- params.merge!(:create_if_missing => true,
5
- :path => Picolena::IndexSavePath,
6
- :analyzer => Picolena::Analyzer
7
- # huge performance impact?
8
- # :auto_flush => true
9
- )
10
- # Creates the IndexWriter
11
- super(params)
12
- # Add required fields (content, filetype, probably_unique_id, ...)
13
- add_fields!
14
- end
15
-
16
- def self.remove
17
- Dir.glob(File.join(Picolena::IndexSavePath,'*')).each{|f| FileUtils.rm(f) if File.file?(f)}
18
- end
19
-
20
- private
21
- def add_fields!
22
- # No need to re-create any field.
23
- return unless field_infos.fields.empty?
24
- field_infos.add_field(:complete_path, :store => :yes, :index => :yes)
25
- field_infos.add_field(:content, :store => :yes, :index => :yes)
26
- field_infos.add_field(:basename, :store => :no, :index => :yes, :boost => 1.5)
27
- field_infos.add_field(:file, :store => :no, :index => :yes, :boost => 1.5)
28
- field_infos.add_field(:filetype, :store => :no, :index => :yes, :boost => 1.5)
29
- field_infos.add_field(:date, :store => :yes, :index => :yes)
30
- field_infos.add_field(:probably_unique_id, :store => :no, :index => :yes)
31
- field_infos.add_field(:lang, :store => :yes, :index => :yes)
32
- end
33
- end
@@ -1,7 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- describe IndexReader do
4
- before(:each) do
5
- @index_reader = IndexReader.new
6
- end
7
- end
@@ -1,7 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- describe IndexWriter do
4
- before(:each) do
5
- @index_writer = IndexWriter.new
6
- end
7
- end