picolena 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/History.txt +11 -0
  2. data/Manifest.txt +16 -4
  3. data/lib/picolena/picolena_generator.rb +0 -1
  4. data/lib/picolena/templates/app/helpers/documents_helper.rb +4 -0
  5. data/lib/picolena/templates/app/models/document.rb +21 -12
  6. data/lib/picolena/templates/app/models/finder.rb +38 -18
  7. data/lib/picolena/templates/app/models/indexer.rb +69 -89
  8. data/lib/picolena/templates/app/models/plain_text_extractor.rb +7 -7
  9. data/lib/picolena/templates/app/models/query.rb +4 -2
  10. data/lib/picolena/templates/app/views/documents/_document.html.haml +1 -0
  11. data/lib/picolena/templates/lang/ui/de.yml +3 -1
  12. data/lib/picolena/templates/lang/ui/en.yml +3 -1
  13. data/lib/picolena/templates/lang/ui/es.yml +3 -1
  14. data/lib/picolena/templates/lang/ui/fr.yml +3 -1
  15. data/lib/picolena/templates/lib/tasks/index.rake +3 -3
  16. data/lib/picolena/templates/public/images/flags/ar.png +0 -0
  17. data/lib/picolena/templates/public/images/flags/be.png +0 -0
  18. data/lib/picolena/templates/public/images/flags/ca.png +0 -0
  19. data/lib/picolena/templates/public/images/flags/de.png +0 -0
  20. data/lib/picolena/templates/public/images/flags/el.png +0 -0
  21. data/lib/picolena/templates/public/images/flags/en.png +0 -0
  22. data/lib/picolena/templates/public/images/flags/es.png +0 -0
  23. data/lib/picolena/templates/public/images/flags/fr.png +0 -0
  24. data/lib/picolena/templates/public/images/flags/ga.png +0 -0
  25. data/lib/picolena/templates/public/images/flags/hr.png +0 -0
  26. data/lib/picolena/templates/public/images/flags/it.png +0 -0
  27. data/lib/picolena/templates/public/images/flags/nl.png +0 -0
  28. data/lib/picolena/templates/public/images/flags/pl.png +0 -0
  29. data/lib/picolena/templates/public/images/flags/pt-br.png +0 -0
  30. data/lib/picolena/templates/public/images/flags/pt-pt.png +0 -0
  31. data/lib/picolena/templates/public/images/flags/readme.txt +9 -0
  32. data/lib/picolena/templates/spec/models/basic_finder_spec.rb +13 -10
  33. data/lib/picolena/templates/spec/models/finder_spec.rb +5 -5
  34. data/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb +7 -7
  35. data/lib/picolena/version.rb +1 -1
  36. data/tasks/hack.rake +2 -2
  37. data/website/index.html +1 -1
  38. data.tar.gz.sig +1 -2
  39. metadata +19 -6
  40. metadata.gz.sig +0 -0
  41. data/lib/picolena/templates/app/models/index_reader.rb +0 -54
  42. data/lib/picolena/templates/app/models/index_writer.rb +0 -33
  43. data/lib/picolena/templates/spec/models/index_reader_spec.rb +0 -7
  44. data/lib/picolena/templates/spec/models/index_writer_spec.rb +0 -7
data/History.txt CHANGED
@@ -1,3 +1,14 @@
1
+ == 0.1.5 2008-04-
2
+
3
+ * 1 major enhancement:
4
+ * yet another Indexer & Index rewrite
5
+
6
+ * 1 minor enhancement:
7
+ * flags to indicate found language
8
+
9
+ * bug fixes:
10
+ * No more (or just less?) index lock errors
11
+
1
12
  == 0.1.4 2008-04-23
2
13
  * 1 minor enhancement:
3
14
  * minimal MacOS support
data/Manifest.txt CHANGED
@@ -22,8 +22,6 @@ lib/picolena/templates/app/helpers/application_helper.rb
22
22
  lib/picolena/templates/app/helpers/documents_helper.rb
23
23
  lib/picolena/templates/app/models/document.rb
24
24
  lib/picolena/templates/app/models/finder.rb
25
- lib/picolena/templates/app/models/index_reader.rb
26
- lib/picolena/templates/app/models/index_writer.rb
27
25
  lib/picolena/templates/app/models/indexer.rb
28
26
  lib/picolena/templates/app/models/plain_text_extractor.rb
29
27
  lib/picolena/templates/app/models/query.rb
@@ -75,6 +73,22 @@ lib/picolena/templates/public/favicon.ico
75
73
  lib/picolena/templates/public/help/PicolenaHowTo-de.pdf
76
74
  lib/picolena/templates/public/help/PicolenaHowTo-de.tex
77
75
  lib/picolena/templates/public/images/bg.gif
76
+ lib/picolena/templates/public/images/flags/ar.png
77
+ lib/picolena/templates/public/images/flags/be.png
78
+ lib/picolena/templates/public/images/flags/ca.png
79
+ lib/picolena/templates/public/images/flags/de.png
80
+ lib/picolena/templates/public/images/flags/el.png
81
+ lib/picolena/templates/public/images/flags/en.png
82
+ lib/picolena/templates/public/images/flags/es.png
83
+ lib/picolena/templates/public/images/flags/fr.png
84
+ lib/picolena/templates/public/images/flags/ga.png
85
+ lib/picolena/templates/public/images/flags/hr.png
86
+ lib/picolena/templates/public/images/flags/it.png
87
+ lib/picolena/templates/public/images/flags/nl.png
88
+ lib/picolena/templates/public/images/flags/pl.png
89
+ lib/picolena/templates/public/images/flags/pt-br.png
90
+ lib/picolena/templates/public/images/flags/pt-pt.png
91
+ lib/picolena/templates/public/images/flags/readme.txt
78
92
  lib/picolena/templates/public/images/icons/cad.png
79
93
  lib/picolena/templates/public/images/icons/code.png
80
94
  lib/picolena/templates/public/images/icons/doc.png
@@ -120,8 +134,6 @@ lib/picolena/templates/spec/models/document_spec.rb
120
134
  lib/picolena/templates/spec/models/finder_spec.rb
121
135
  lib/picolena/templates/spec/models/host_indexing_system_spec.rb
122
136
  lib/picolena/templates/spec/models/index_directories_spec.rb
123
- lib/picolena/templates/spec/models/index_reader_spec.rb
124
- lib/picolena/templates/spec/models/index_writer_spec.rb
125
137
  lib/picolena/templates/spec/models/indexer_spec.rb
126
138
  lib/picolena/templates/spec/models/plain_text_extractor_spec.rb
127
139
  lib/picolena/templates/spec/models/query_spec.rb
@@ -163,7 +163,6 @@ EOS
163
163
  spec/test_dirs/indexed/yet_another_dir
164
164
  spec/test_dirs/not_indexed
165
165
  spec/views
166
- spec/views/application
167
166
  tmp/cache
168
167
  tmp/ferret_indexes
169
168
  tmp/pids
@@ -37,6 +37,10 @@ module DocumentsHelper
37
37
  }) if document.matching_content
38
38
  end
39
39
 
40
+ def language_icon_for(document)
41
+ (lang=document.language) && image_tag("flags/#{lang}.png")
42
+ end
43
+
40
44
  # Returns icon and filename for any given document.
41
45
  def icon_and_filename_for(document)
42
46
  [icon_for(document.extname),document.filename].join(" ")
@@ -71,21 +71,33 @@ class Document
71
71
  # Returns the last modification date before the document got indexed.
72
72
  # Useful to know how old a document is, and to which version the cache corresponds.
73
73
  def date
74
- from_index[:date].sub(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})/,'\1-\2-\3 \4:\5:\6')
74
+ from_index[:modified].sub(/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})/,'\1-\2-\3 \4:\5:\6')
75
75
  end
76
76
 
77
77
  def mtime
78
- from_index[:date].to_i
78
+ from_index[:modified].to_i
79
79
  end
80
80
 
81
81
  # Returns language.
82
- def lang
83
- from_index[:lang]
82
+ def language
83
+ from_index[:language]
84
84
  end
85
85
 
86
86
  # Returns the id with which the document is indexed.
87
87
  def index_id
88
- @index_id ||= Document.find_by_complete_path(complete_path).index_id
88
+ @index_id ||= Finder.term_search(:complete_path, complete_path).doc
89
+ end
90
+
91
+ # Fields that are shared between every document.
92
+ def self.default_fields_for(complete_path)
93
+ {
94
+ :complete_path => complete_path,
95
+ :probably_unique_id => complete_path.base26_hash,
96
+ :filename => File.basename(complete_path),
97
+ :basename => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '),
98
+ :filetype => File.extname(complete_path),
99
+ :modified => File.mtime(complete_path).strftime("%Y%m%d%H%M%S")
100
+ }
89
101
  end
90
102
 
91
103
  private
@@ -93,17 +105,14 @@ class Document
93
105
  # Retrieves the document from the index.
94
106
  # Useful to get meta-info about it.
95
107
  def from_index
96
- IndexReader.new[index_id]
108
+ Indexer.index[index_id]
97
109
  end
98
110
 
99
111
  def self.find_by_unique_id(some_id)
100
- Finder.new("probably_unique_id:"<<some_id).matching_document
112
+ doc_id=Finder.term_search(:probably_unique_id, some_id).doc
113
+ new(Indexer.index[doc_id][:complete_path])
101
114
  end
102
-
103
- def self.find_by_complete_path(complete_path)
104
- Finder.new('complete_path:"'<<complete_path<<'"').matching_document
105
- end
106
-
115
+
107
116
  def in_indexed_directory?
108
117
  !indexed_directory.nil?
109
118
  end
@@ -2,18 +2,16 @@ class Finder
2
2
  attr_reader :query
3
3
 
4
4
  def index
5
- # caching index @@index ||=
6
- # causes ferret-0.11.6/lib/ferret/index.rb:768: [BUG] Segmentation fault
7
- IndexReader.new
5
+ @@index ||= Indexer.index
8
6
  end
9
7
 
10
8
  def initialize(raw_query,page=1,results_per_page=Picolena::ResultsPerPage)
11
9
  @query = Query.extract_from(raw_query)
12
10
  @raw_query= raw_query
13
- IndexReader.ensure_existence
11
+ Indexer.ensure_index_existence
14
12
  @per_page=results_per_page
15
13
  @offset=(page.to_i-1)*results_per_page
16
- index.should_have_documents
14
+ index_should_have_documents
17
15
  end
18
16
 
19
17
  def execute!
@@ -31,9 +29,9 @@ class Finder
31
29
  found_doc.score=score
32
30
  found_doc.index_id=index_id
33
31
  @matching_documents<<found_doc
34
- rescue Errno::ENOENT
35
- #"File has been moved/deleted!"
36
- end
32
+ rescue Errno::ENOENT
33
+ #"File has been moved/deleted!"
34
+ end
37
35
  }
38
36
  @executed=true
39
37
  @time_needed=Time.now-start
@@ -60,14 +58,36 @@ class Finder
60
58
  # Returns matching document for any given query only if
61
59
  # exactly one document is found.
62
60
  # Raises otherwise.
63
- def matching_document
64
- case matching_documents.size
65
- when 0
66
- raise IndexError, "No document found"
67
- when 1
68
- matching_documents.first
69
- else
70
- raise IndexError, "More than one document found"
71
- end
72
- end
61
+ def matching_document
62
+ case matching_documents.size
63
+ when 0
64
+ raise IndexError, "No document found"
65
+ when 1
66
+ matching_documents.first
67
+ else
68
+ raise IndexError, "More than one document found"
69
+ end
70
+ end
71
+
72
+ class<<self
73
+ def searcher
74
+ @@searcher ||= Ferret::Search::Searcher.new(Picolena::IndexSavePath)
75
+ end
76
+
77
+ def term_search(field,term)
78
+ query = Ferret::Search::TermQuery.new(field,term)
79
+ searcher.search(query).hits.first
80
+ end
81
+
82
+ def reload!
83
+ @@searcher = nil
84
+ @@index = nil
85
+ end
86
+ end
87
+
88
+ private
89
+
90
+ def index_should_have_documents
91
+ raise IndexError, "no document found" unless index.size > 0
92
+ end
73
93
  end
@@ -5,54 +5,22 @@ class Indexer
5
5
  @@max_threads_number = 8
6
6
 
7
7
  class << self
8
- def fields_for(complete_path)
9
- {
10
- :complete_path => complete_path,
11
- :probably_unique_id => complete_path.base26_hash,
12
- :file => File.basename(complete_path),
13
- :basename => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '),
14
- :filetype => File.extname(complete_path),
15
- :date => File.mtime(complete_path).strftime("%Y%m%d%H%M%S")
16
- }
17
- end
18
-
19
- def index_every_directory(update=true)
8
+ def index_every_directory(remove_first=false)
9
+ clear! if remove_first
10
+ # Forces Finder.searcher and Finder.index to be reloaded, by removing them from the cache.
11
+ Finder.reload!
20
12
  log :debug => "Indexing every directory"
21
-
22
-
23
13
  start=Time.now
24
- @update = update
25
- reset! unless update
26
-
27
14
  Picolena::IndexedDirectories.each{|dir, alias_dir|
28
15
  index_directory_with_multithreads(dir)
29
16
  }
30
- # FIXME: with those 2 lines,
17
+ log :debug => "Now optimizing index"
31
18
  writer.optimize
32
- writer.close
33
- # launching Indexer.index_every_directory twice in a row
34
- # would raise a SEGFAULT:
35
- # picolena/lib/picolena/templates/app/models/indexer.rb:27: [BUG] Segmentation fault
36
- # ruby 1.8.6 (2007-06-07) [i486-linux]
37
- #
38
- # Aborted (core dumped)
39
- #
40
- # But without those 2 lines, specs don't pass anymore.
41
- #
42
19
  log :debug => "Indexing done in #{Time.now-start} s."
43
20
  end
44
21
 
45
22
  def index_directory_with_multithreads(dir)
46
- # FIXME: Don't know why, but if more than one thread is created while update the index,
47
- # indexer raises:
48
- #
49
- # current thread not owner
50
- # /usr/lib/ruby/1.8/monitor.rb:278:in `mon_check_owner'
51
- # /home/www/picolena/lib/picolena/templates/lib/core_exts.rb:32:in `join'
52
- # ...
53
- #
54
- # So Index creation is multithreaded, Index update is monothreaded.
55
- threads_number = @update ? 1 : @@max_threads_number
23
+ threads_number = @@max_threads_number
56
24
  log :debug => "Indexing #{dir}, #{threads_number} thread(s)"
57
25
 
58
26
  indexing_list=Dir[File.join(dir,"**/*")].select{|filename|
@@ -61,82 +29,94 @@ class Indexer
61
29
 
62
30
  indexing_list_chunks=indexing_list.in_transposed_slices(threads_number)
63
31
 
32
+ # It initializes an IndexWriter before launching multithreaded
33
+ # indexing. Otherwise, two threads could try to instantiate
34
+ # an IndexWriter at the same time, and get a
35
+ # Ferret::Store::Lock::LockError
36
+ writer
37
+
64
38
  indexing_list_chunks.each_with_thread{|chunk|
65
39
  chunk.each{|filename|
66
- add_or_update_file(filename)
40
+ add_file(filename)
67
41
  }
68
42
  }
69
43
  end
70
44
 
71
- def add_or_update_file(complete_path)
72
- should_be_added = true
73
- if @update then
74
- log :debug => "What to do with #{complete_path} ?"
75
- occurences = reader.occurences_number(complete_path)
76
- log :debug => "\tappears #{occurences} times in the index"
77
- case occurences
78
- when 0
79
- #Nothing to do here, the file will be added.
80
- when 1
81
- d=Document.find_by_complete_path(complete_path)
82
- if File.mtime(complete_path).strftime("%Y%m%d%H%M%S").to_i > d.mtime then
83
- log :debug => "\thas been modified"
84
- delete_file(complete_path)
85
- else
86
- should_be_added = false
87
- log :debug => "\thas not been modified. leaving it"
88
- end
89
- else
90
- delete_file(complete_path)
91
- end
92
- end
93
- add_file(complete_path) if should_be_added
94
- end
95
-
96
45
  def add_file(complete_path)
97
- log :debug => "Adding #{complete_path}"
98
- mime_type=File.mime(complete_path)
99
- fields = fields_for(complete_path)
100
-
101
- begin
102
- text, lang = PlainTextExtractor.extract_content_and_language_from(complete_path)
103
- raise "\tempty document #{complete_path}" if text.strip.empty?
104
- fields[:content] = text
105
- log :debug => "language found: #{lang}" if lang
106
- fields[:lang] = lang
46
+ default_fields = Document.default_fields_for(complete_path)
47
+ begin
48
+ document = PlainTextExtractor.extract_content_and_language_from(complete_path)
49
+ raise "empty document #{complete_path}" if document[:content].strip.empty?
50
+ document.merge! default_fields
51
+ log :debug => ["Added : #{complete_path}",document[:language] ? " (#{document[:language]})" : ""].join
107
52
  rescue => e
108
53
  log :debug => "\tindexing without content: #{e.message}"
54
+ document = default_fields
109
55
  end
110
-
111
- writer << fields
56
+ writer << document
112
57
  end
113
58
 
114
- def writer
115
- @@writer ||= IndexWriter.new
59
+ # Ensures writer is closed, and removes every index file for RAILS_ENV.
60
+ def clear!(all=false)
61
+ close
62
+ to_remove=all ? Picolena::IndexesSavePath : Picolena::IndexSavePath
63
+ Dir.glob(File.join(to_remove,'**/*')).each{|f| FileUtils.rm(f) if File.file?(f)}
116
64
  end
117
65
 
118
- def reader
119
- @@reader ||= IndexReader.new
66
+ # Closes the writer and
67
+ # ensures that a new IndexWriter is instantiated next time writer is called.
68
+ def close
69
+ @@writer.close rescue nil
70
+ # Ferret will SEGFAULT otherwise.
71
+ @@writer = nil
120
72
  end
121
73
 
122
- def reset!
123
- log :debug => "Resetting Index"
124
- @@writer=nil
125
- @@reader=nil
126
- IndexWriter.remove
74
+ # Only one IndexWriter should be instantiated.
75
+ # If one already exists, returns it.
76
+ # Creates it otherwise.
77
+ def writer
78
+ @@writer ||= Ferret::Index::IndexWriter.new(default_index_params)
127
79
  end
128
80
 
129
- def delete_file(complete_path)
130
- log :debug => "\tRemoving from index"
131
- reader.delete_by_complete_path(complete_path)
81
+ def index
82
+ Ferret::Index::Index.new(default_index_params)
83
+ end
84
+
85
+ def ensure_index_existence
86
+ index_every_directory(:remove_first) unless index_exists? or RAILS_ENV=="production"
132
87
  end
133
88
 
134
89
  private
135
90
 
91
+ def index_exists?
92
+ index_filename and File.exists?(index_filename)
93
+ end
94
+
95
+ def index_filename
96
+ Dir.glob(File.join(Picolena::IndexSavePath,'*.cfs')).first
97
+ end
98
+
136
99
  def log(hash)
137
100
  hash.each{|level,message|
138
101
  IndexerLogger.send(level,message)
139
102
  }
140
- end
103
+ end
104
+
105
+ def default_index_params
106
+ {:path => Picolena::IndexSavePath, :analyzer => Picolena::Analyzer, :field_infos => default_field_infos}
107
+ end
108
+
109
+ def default_field_infos
110
+ returning Ferret::Index::FieldInfos.new do |field_infos|
111
+ field_infos.add_field(:complete_path, :store => :yes, :index => :untokenized)
112
+ field_infos.add_field(:content, :store => :yes, :index => :yes)
113
+ field_infos.add_field(:basename, :store => :no, :index => :yes, :boost => 1.5)
114
+ field_infos.add_field(:filename, :store => :no, :index => :yes, :boost => 1.5)
115
+ field_infos.add_field(:filetype, :store => :no, :index => :yes, :boost => 1.5)
116
+ field_infos.add_field(:modified, :store => :yes, :index => :untokenized)
117
+ field_infos.add_field(:probably_unique_id, :store => :no, :index => :yes)
118
+ field_infos.add_field(:language, :store => :yes, :index => :yes)
119
+ end
120
+ end
141
121
  end
142
122
  end
@@ -109,12 +109,12 @@ class PlainTextExtractor
109
109
  # and if probability score is higher than 90%.
110
110
  def extract_content_and_language
111
111
  content=extract_content
112
- return [content, nil] unless [# Is LanguageRecognition turned on? (cf config/custom/picolena.rb)
113
- Picolena::UseLanguageRecognition,
114
- # Is a language guesser already installed?
115
- PlainTextExtractor.language_guesser,
116
- # Language recognition is too unreliable for small files.
117
- content.size > 500].all?
112
+ return {:content => content} unless [# Is LanguageRecognition turned on? (cf config/custom/picolena.rb)
113
+ Picolena::UseLanguageRecognition,
114
+ # Is a language guesser already installed?
115
+ PlainTextExtractor.language_guesser,
116
+ # Language recognition is too unreliable for small files.
117
+ content.size > 500].all?
118
118
  language=IO.popen(PlainTextExtractor.language_guesser,'w+'){|lang_guesser|
119
119
  lang_guesser.write content
120
120
  lang_guesser.close_write
@@ -125,6 +125,6 @@ class PlainTextExtractor
125
125
  lang unless score<0.9
126
126
  end
127
127
  }
128
- [content,language]
128
+ {:content => content, :language => language}
129
129
  end
130
130
  end
@@ -13,9 +13,11 @@ class Query
13
13
  /\b#{:AND.l}\b/=>'AND',
14
14
  /\b#{:OR.l}\b/=>'OR',
15
15
  /\b#{:NOT.l}\b/=>'NOT',
16
+ /(#{:filename.l}):/=>'filename:',
16
17
  /(#{:filetype.l}):/=>'filetype:',
17
18
  /#{:content.l}:/ => 'content:',
18
- /#{:date.l}:/ => 'date:',
19
+ /(#{:modified.l}):/ => 'modified:',
20
+ /(#{:language.l}):/ => 'language:',
19
21
  /\b#{:LIKE.l}\s+(\S+)/=>'\1~'
20
22
  }
21
23
  to_en.inject(raw_query){|mem,non_english_to_english_keyword|
@@ -25,7 +27,7 @@ class Query
25
27
 
26
28
  # Instantiates a QueryParser once, and keeps it in cache.
27
29
  def parser
28
- @@parser ||= Ferret::QueryParser.new(:fields => [:content, :file, :basename, :filetype, :date], :or_default => false, :analyzer=>Picolena::Analyzer)
30
+ @@parser ||= Ferret::QueryParser.new(:fields => [:content, :filename, :basename, :filetype, :modified], :or_default => false, :analyzer=>Picolena::Analyzer)
29
31
  end
30
32
  end
31
33
  end
@@ -1,5 +1,6 @@
1
1
  %h2
2
2
  =link_to icon_and_filename_for(document), download_document_path(document.probably_unique_id)
3
+ =language_icon_for(document)
3
4
  %small=number_to_percentage(document.score*100, :precision=>1)
4
5
  =highlight_matching_content(document)
5
6
  %p=link_to_containing_directory(document)
@@ -19,6 +19,8 @@ NOT: NICHT
19
19
  LIKE: WIE
20
20
 
21
21
  ## Fields
22
+ filename: filename|file|datei
22
23
  filetype: erweiterung|ext
23
24
  content: inhalt
24
- date: jahr|zeit
25
+ modified: jahr|zeit|geändert
26
+ language: lang|sprache
@@ -19,6 +19,8 @@ NOT: NOT
19
19
  LIKE: LIKE
20
20
 
21
21
  ## Fields
22
+ filename: filename|file
22
23
  filetype: filetype|ext
23
24
  content: content
24
- date: year|date
25
+ modified: year|date|modified
26
+ language: lang|language
@@ -19,6 +19,8 @@ NOT: "NO"
19
19
  LIKE: COMO
20
20
 
21
21
  ## Fields
22
+ filename: filename|file|archivo
22
23
  filetype: extensión|ext
23
24
  content: contenido
24
- date: fecha|año|anho
25
+ modified: fecha|año|anho|modificado
26
+ language: lang|idioma
@@ -19,6 +19,8 @@ NOT: NON
19
19
  LIKE: COMME
20
20
 
21
21
  ## Fields
22
+ filename: filename|file|fichier
22
23
  filetype: extension|ext
23
24
  content: contenu
24
- date: année|date|annee
25
+ modified: année|date|annee|modifie
26
+ language: lang|langue
@@ -2,17 +2,17 @@ desc 'Ferret index maintenance tasks'
2
2
  namespace :index do
3
3
  desc 'Clear indexes'
4
4
  task :clear => :environment do
5
- IndexWriter.remove
5
+ Indexer.clear! :all
6
6
  end
7
7
 
8
8
  desc 'Create index'
9
9
  task :create => :environment do
10
- Indexer.index_every_directory(update=false)
10
+ Indexer.index_every_directory(remove_first=true)
11
11
  end
12
12
 
13
13
  desc 'Update index'
14
14
  task :update => :environment do
15
- Indexer.index_every_directory(update=true)
15
+ Indexer.index_every_directory
16
16
  end
17
17
 
18
18
  # Search index with query "some query" :
@@ -0,0 +1,9 @@
1
+ Flag icons - http://www.famfamfam.com
2
+
3
+ These icons are public domain, and as such are free for any use (attribution appreciated but not required).
4
+
5
+ Note that these flags are named using the ISO3166-1 alpha-2 country codes where appropriate. A list of codes can be found at http://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
6
+
7
+ If you find these icons useful, please donate via paypal to mjames@gmail.com (or click the donate button available at http://www.famfamfam.com/lab/icons/silk)
8
+
9
+ Contact: mjames@gmail.com
@@ -10,14 +10,14 @@ describe "Finder without index on disk" do
10
10
  end
11
11
 
12
12
  before(:each) do
13
- IndexWriter.remove
13
+ Indexer.clear!
14
14
  end
15
15
 
16
16
  it "should create index" do
17
17
  Picolena::IndexedDirectories.replace({'spec/test_dirs/indexed/just_one_doc'=>'//justonedoc/'})
18
- lambda {@finder_with_new_index=Finder.new("test moi")}.should change(IndexReader, :exists?).from(false).to(true)
18
+ lambda {@finder_with_new_index=Finder.new("test moi")}.should change(Indexer, :index_exists?).from(false).to(true)
19
19
  File.exists?(File.join(@new_index_path,'_0.cfs')).should be_true
20
- IndexReader.new.size.should >0
20
+ Indexer.index.size.should >0
21
21
  end
22
22
 
23
23
  it "should raise if index is still empty after trying to create it" do
@@ -35,16 +35,19 @@ end
35
35
 
36
36
  fields={
37
37
  # description => key
38
- :content=>:content,
39
- :basename=>:basename,
40
- :filename=>:file,
41
- :extension => :filetype,
42
- :modification_time=>:date
38
+ :content => :content,
39
+ :complete_path => :complete_path,
40
+ :basename => :basename,
41
+ :filename => :filename,
42
+ :extension => :filetype,
43
+ :modification_time => :modified,
44
+ :probably_unique_id => :probably_unique_id,
45
+ :language => :language
43
46
  }
44
47
 
45
48
  describe "Basic Finder" do
46
49
  before(:all) do
47
- Indexer.index_every_directory(update=false)
50
+ Indexer.index_every_directory(remove_first=true)
48
51
  end
49
52
 
50
53
  it "should accept one parameter as query, and 2 optionals for paginating" do
@@ -82,7 +85,7 @@ describe "Basic Finder" do
82
85
 
83
86
  fields.each_pair do |description,field_name|
84
87
  it "should index #{description} as :#{field_name}" do
85
- IndexReader.new.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
88
+ Indexer.index.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
86
89
  end
87
90
  end
88
91
 
@@ -21,7 +21,7 @@ describe Finder do
21
21
  File.utime(0, once_upon_a_time, 'spec/test_dirs/indexed/basic/basic.pdf')
22
22
  File.utime(0, a_bit_later, 'spec/test_dirs/indexed/yet_another_dir/office2003-word-template.dot')
23
23
  File.utime(0, nineties, 'spec/test_dirs/indexed/others/placeholder.txt')
24
- Indexer.index_every_directory(update=false)
24
+ Indexer.index_every_directory(remove_first=true)
25
25
  end
26
26
 
27
27
  it "should find documents according to their basename when specified with basename:query" do
@@ -30,8 +30,8 @@ describe Finder do
30
30
  matching_documents_filename.should include("crossed.text")
31
31
  end
32
32
 
33
- it "should find documents according to their filename when specified with file:query" do
34
- Finder.new("file:crossed.text").matching_documents.collect{|d| d.content}.should include("txt inside!")
33
+ it "should find documents according to their filename when specified with file:query or filename:query" do
34
+ Finder.new("filename:crossed.text").matching_documents.collect{|d| d.content}.should include("txt inside!")
35
35
  Finder.new("file:crossed.txt").matching_documents.collect{|d| d.content}.should include("text inside!")
36
36
  end
37
37
 
@@ -47,9 +47,9 @@ describe Finder do
47
47
  end
48
48
 
49
49
  it "should give a boost to basename, filename and filetype in index" do
50
- index=IndexReader.new
50
+ index=Indexer.index
51
51
  index.field_infos[:basename].boost.should > 1.0
52
- index.field_infos[:file].boost.should > 1.0
52
+ index.field_infos[:filename].boost.should > 1.0
53
53
  index.field_infos[:filetype].boost.should > 1.0
54
54
  end
55
55
 
@@ -2,7 +2,7 @@ require File.dirname(__FILE__) + '/../spec_helper'
2
2
 
3
3
  describe "PlainTextExtractors" do
4
4
  before(:all) do
5
- IndexReader.ensure_existence
5
+ Indexer.ensure_index_existence
6
6
  end
7
7
 
8
8
  PlainTextExtractor.all.each{|extractor|
@@ -29,14 +29,14 @@ describe "PlainTextExtractors" do
29
29
  }
30
30
 
31
31
  it "should guess language when enough content is available" do
32
- Document.new("spec/test_dirs/indexed/lang/goethe").lang.should == "de"
33
- Document.new("spec/test_dirs/indexed/lang/shakespeare").lang.should == "en"
34
- Document.new("spec/test_dirs/indexed/lang/lorca").lang.should == "es"
35
- Document.new("spec/test_dirs/indexed/lang/hugo").lang.should == "fr"
32
+ Document.new("spec/test_dirs/indexed/lang/goethe").language.should == "de"
33
+ Document.new("spec/test_dirs/indexed/lang/shakespeare").language.should == "en"
34
+ Document.new("spec/test_dirs/indexed/lang/lorca").language.should == "es"
35
+ Document.new("spec/test_dirs/indexed/lang/hugo").language.should == "fr"
36
36
  end
37
37
 
38
38
  it "should not try to guess language when file is too small" do
39
- Document.new("spec/test_dirs/indexed/basic/hello.rb").lang.should be_empty
40
- Document.new("spec/test_dirs/indexed/README").lang.should be_empty
39
+ Document.new("spec/test_dirs/indexed/basic/hello.rb").language.should be_nil
40
+ Document.new("spec/test_dirs/indexed/README").language.should be_nil
41
41
  end
42
42
  end
@@ -2,7 +2,7 @@ module Picolena #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 4
5
+ TINY = 5
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/tasks/hack.rake CHANGED
@@ -1,9 +1,9 @@
1
1
  desc 'Create development picolena structure inside lib/picolena/templates'
2
- task :lets_hack do
2
+ task :lets_hack => :clean do
3
3
  picolena_root=File.join(File.dirname(__FILE__),'..')
4
4
  Dir.chdir(picolena_root){
5
5
  # Doesn't overwrite any file, Doesn't create any index, Doesn't launch any spec.
6
- system("ruby bin/picolena lib/picolena/templates/spec/test_dirs/indexed --skip --no-index --no-spec --destination=lib/picolena/templates")
6
+ system("ruby bin/picolena lib/picolena/templates/spec/test_dirs/indexed --no-index --no-spec --destination=lib/picolena/templates")
7
7
  }
8
8
  puts <<-EXPLAIN
9
9
 
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>Picolena</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/picolena"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.4</a>
36
+ <a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.5</a>
37
37
  </div>
38
38
  <h1>&#x2192; &#8216;picolena&#8217;</h1>
39
39
 
data.tar.gz.sig CHANGED
@@ -1,2 +1 @@
1
- .��@�:���N6JQp9V"QT�k�7~4*�D��w��u���%v��[��rY���hBt:Cv=�,8ڽ��c���;I��V[$y�nj�ϓέN�3��x+��yCQ^ہ�C(L)�O7�-��2ZVL]���i~��JK"8F�|��:�eT��Vp��ߋU��] ��
2
- ��V���[;#̧KM���$�;=X�~�>���� wYI7��3ksv��A߶� ��0�GZTi7$�����>@
1
+ B 8Ǣ�����ԝ�ŗFAsέ�%l�ѵ�Aw�k>�6�w���|ĝW^9>]���k��i����I٤�e�Z7٭Px���UK��+r�>P��al�<�T+eL@�HD�!�@��X�nV鐎wa<��b臋�g����,q���m�{i��2����#�m�=�܈ϲH'Wmá؝=cm��ݔ�^��㩫҃L=�ˁ�"r L{7�{R'4 �������k����hkx�����=��6�j
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picolena
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Duminil
@@ -30,7 +30,7 @@ cert_chain:
30
30
  qvI9FgPZ1QTG5uZAlBbk6d6JU2XfpA==
31
31
  -----END CERTIFICATE-----
32
32
 
33
- date: 2008-04-23 00:00:00 +02:00
33
+ date: 2008-04-25 00:00:00 +02:00
34
34
  default_executable:
35
35
  dependencies:
36
36
  - !ruby/object:Gem::Dependency
@@ -107,6 +107,7 @@ extra_rdoc_files:
107
107
  - History.txt
108
108
  - Manifest.txt
109
109
  - README.txt
110
+ - lib/picolena/templates/public/images/flags/readme.txt
110
111
  - lib/picolena/templates/public/robots.txt
111
112
  - lib/picolena/templates/spec/test_dirs/indexed/basic/crossed.txt
112
113
  - lib/picolena/templates/spec/test_dirs/indexed/basic/plain.txt
@@ -156,8 +157,6 @@ files:
156
157
  - lib/picolena/templates/app/helpers/documents_helper.rb
157
158
  - lib/picolena/templates/app/models/document.rb
158
159
  - lib/picolena/templates/app/models/finder.rb
159
- - lib/picolena/templates/app/models/index_reader.rb
160
- - lib/picolena/templates/app/models/index_writer.rb
161
160
  - lib/picolena/templates/app/models/indexer.rb
162
161
  - lib/picolena/templates/app/models/plain_text_extractor.rb
163
162
  - lib/picolena/templates/app/models/query.rb
@@ -209,6 +208,22 @@ files:
209
208
  - lib/picolena/templates/public/help/PicolenaHowTo-de.pdf
210
209
  - lib/picolena/templates/public/help/PicolenaHowTo-de.tex
211
210
  - lib/picolena/templates/public/images/bg.gif
211
+ - lib/picolena/templates/public/images/flags/ar.png
212
+ - lib/picolena/templates/public/images/flags/be.png
213
+ - lib/picolena/templates/public/images/flags/ca.png
214
+ - lib/picolena/templates/public/images/flags/de.png
215
+ - lib/picolena/templates/public/images/flags/el.png
216
+ - lib/picolena/templates/public/images/flags/en.png
217
+ - lib/picolena/templates/public/images/flags/es.png
218
+ - lib/picolena/templates/public/images/flags/fr.png
219
+ - lib/picolena/templates/public/images/flags/ga.png
220
+ - lib/picolena/templates/public/images/flags/hr.png
221
+ - lib/picolena/templates/public/images/flags/it.png
222
+ - lib/picolena/templates/public/images/flags/nl.png
223
+ - lib/picolena/templates/public/images/flags/pl.png
224
+ - lib/picolena/templates/public/images/flags/pt-br.png
225
+ - lib/picolena/templates/public/images/flags/pt-pt.png
226
+ - lib/picolena/templates/public/images/flags/readme.txt
212
227
  - lib/picolena/templates/public/images/icons/cad.png
213
228
  - lib/picolena/templates/public/images/icons/code.png
214
229
  - lib/picolena/templates/public/images/icons/doc.png
@@ -254,8 +269,6 @@ files:
254
269
  - lib/picolena/templates/spec/models/finder_spec.rb
255
270
  - lib/picolena/templates/spec/models/host_indexing_system_spec.rb
256
271
  - lib/picolena/templates/spec/models/index_directories_spec.rb
257
- - lib/picolena/templates/spec/models/index_reader_spec.rb
258
- - lib/picolena/templates/spec/models/index_writer_spec.rb
259
272
  - lib/picolena/templates/spec/models/indexer_spec.rb
260
273
  - lib/picolena/templates/spec/models/plain_text_extractor_spec.rb
261
274
  - lib/picolena/templates/spec/models/query_spec.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,54 +0,0 @@
1
- class IndexReader < Ferret::Index::Index
2
- def initialize(params={})
3
- # Add needed parameters
4
- params.merge!(:path => Picolena::IndexSavePath, :analyzer => Picolena::Analyzer)
5
- # Creates the IndexReader
6
- super(params)
7
- end
8
-
9
- # Returns the number of times a file is present in the index.
10
- # index_reader.doc_freq(field, term) → integer
11
- # Return the number of documents in which the term term appears in the field field.
12
- def occurences_number(complete_path)
13
- # complete_path_query = Ferret::Search::TermQuery.new(:complete_path, complete_path)
14
- search_by_complete_path(complete_path).total_hits
15
- end
16
-
17
- def search_by_complete_path(complete_path)
18
- search('complete_path:"'<<complete_path<<'"')
19
- end
20
-
21
- def delete_by_complete_path(complete_path)
22
- search_by_complete_path(complete_path).hits.each{|hit|
23
- delete(hit.doc)
24
- }
25
- close
26
- end
27
-
28
-
29
- # Validation methods.
30
-
31
- def should_have_documents
32
- raise IndexError, "no document found" unless has_documents?
33
- end
34
-
35
- # Returns true if there's at least one document indexed.
36
- def has_documents?
37
- size>0
38
- end
39
-
40
- class<<self
41
-
42
- def ensure_existence
43
- Indexer.index_every_directory(update=false) unless exists? or RAILS_ENV=="production"
44
- end
45
-
46
- def exists?
47
- filename and File.exists?(filename)
48
- end
49
-
50
- def filename
51
- Dir.glob(File.join(Picolena::IndexSavePath,'*.cfs')).first
52
- end
53
- end
54
- end
@@ -1,33 +0,0 @@
1
- class IndexWriter < Ferret::Index::IndexWriter
2
- def initialize(params={})
3
- # Add needed parameters
4
- params.merge!(:create_if_missing => true,
5
- :path => Picolena::IndexSavePath,
6
- :analyzer => Picolena::Analyzer
7
- # huge performance impact?
8
- # :auto_flush => true
9
- )
10
- # Creates the IndexWriter
11
- super(params)
12
- # Add required fields (content, filetype, probably_unique_id, ...)
13
- add_fields!
14
- end
15
-
16
- def self.remove
17
- Dir.glob(File.join(Picolena::IndexSavePath,'*')).each{|f| FileUtils.rm(f) if File.file?(f)}
18
- end
19
-
20
- private
21
- def add_fields!
22
- # No need to re-create any field.
23
- return unless field_infos.fields.empty?
24
- field_infos.add_field(:complete_path, :store => :yes, :index => :yes)
25
- field_infos.add_field(:content, :store => :yes, :index => :yes)
26
- field_infos.add_field(:basename, :store => :no, :index => :yes, :boost => 1.5)
27
- field_infos.add_field(:file, :store => :no, :index => :yes, :boost => 1.5)
28
- field_infos.add_field(:filetype, :store => :no, :index => :yes, :boost => 1.5)
29
- field_infos.add_field(:date, :store => :yes, :index => :yes)
30
- field_infos.add_field(:probably_unique_id, :store => :no, :index => :yes)
31
- field_infos.add_field(:lang, :store => :yes, :index => :yes)
32
- end
33
- end
@@ -1,7 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- describe IndexReader do
4
- before(:each) do
5
- @index_reader = IndexReader.new
6
- end
7
- end
@@ -1,7 +0,0 @@
1
- require File.dirname(__FILE__) + '/../spec_helper'
2
-
3
- describe IndexWriter do
4
- before(:each) do
5
- @index_writer = IndexWriter.new
6
- end
7
- end