picolena 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. data/History.txt +14 -0
  2. data/Manifest.txt +28 -8
  3. data/config/files_to_clean +1 -0
  4. data/config/requirements.rb +1 -1
  5. data/lib/picolena/config/basic.rb +2 -1
  6. data/lib/picolena/config/icons_and_filetypes.yml +5 -0
  7. data/lib/picolena/picolena_generator.rb +3 -1
  8. data/lib/picolena/templates/app/helpers/documents_helper.rb +4 -4
  9. data/lib/picolena/templates/app/models/document.rb +27 -4
  10. data/lib/picolena/templates/app/models/indexer.rb +6 -2
  11. data/lib/picolena/templates/app/models/plain_text_extractor.rb +27 -13
  12. data/lib/picolena/templates/app/models/query.rb +2 -2
  13. data/lib/picolena/templates/app/views/documents/_document.html.haml +1 -1
  14. data/lib/picolena/templates/config/environments/development.rb +2 -0
  15. data/lib/picolena/templates/config/initializers/001_load_ferret.rb +17 -0
  16. data/lib/picolena/templates/config/initializers/{001_load_custom_config.rb → 002_load_custom_config.rb} +1 -2
  17. data/lib/picolena/templates/config/initializers/{002_load_indexed_dirs.rb → 003_load_indexed_dirs.rb} +0 -0
  18. data/lib/picolena/templates/config/initializers/{003_load_white_list_IPs.rb → 004_load_white_list_IPs.rb} +0 -0
  19. data/lib/picolena/templates/config/initializers/{004_load_plain_text_extractors.rb → 005_load_plain_text_extractors.rb} +1 -1
  20. data/lib/picolena/templates/config/initializers/{005_load_custom_title_and_names_and_links.rb → 006_load_custom_title_and_names_and_links.rb} +0 -0
  21. data/lib/picolena/templates/config/initializers/{006_load_icons.rb → 007_load_icons.rb} +0 -0
  22. data/lib/picolena/templates/config/initializers/{007_load_performance_tweaks.rb → 008_load_performance_tweaks.rb} +0 -0
  23. data/lib/picolena/templates/lib/core_exts.rb +52 -0
  24. data/lib/picolena/templates/lib/development_helpers.rb +35 -0
  25. data/lib/picolena/templates/lib/plain_text_extractor_dsl.rb +128 -0
  26. data/lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb +2 -2
  27. data/lib/picolena/templates/lib/plain_text_extractors/adobe.photoshop.rb +12 -0
  28. data/lib/picolena/templates/lib/plain_text_extractors/html.rb +1 -1
  29. data/lib/picolena/templates/lib/plain_text_extractors/ms.excel.rb +4 -4
  30. data/lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb +4 -4
  31. data/lib/picolena/templates/lib/plain_text_extractors/ms.rtf.rb +3 -3
  32. data/lib/picolena/templates/lib/plain_text_extractors/ms.word.rb +4 -4
  33. data/lib/picolena/templates/lib/plain_text_extractors/opendocument.presentation.rb +2 -2
  34. data/lib/picolena/templates/lib/plain_text_extractors/opendocument.spreadsheet.rb +2 -2
  35. data/lib/picolena/templates/lib/plain_text_extractors/opendocument.text.rb +2 -2
  36. data/lib/picolena/templates/lib/plain_text_extractors/pictures.rb +15 -4
  37. data/lib/picolena/templates/lib/plain_text_extractors/plain_text.rb +9 -2
  38. data/lib/picolena/templates/lib/plain_text_extractors/rar.rb +18 -0
  39. data/lib/picolena/templates/lib/plain_text_extractors/videos.rb +13 -0
  40. data/lib/picolena/templates/lib/plain_text_extractors/zip.rb +17 -0
  41. data/lib/picolena/templates/lib/tasks/extract.rake +16 -0
  42. data/lib/picolena/templates/lib/tasks/install_dependencies.rake +1 -1
  43. data/lib/picolena/templates/public/images/thumbnails/NOTE +2 -0
  44. data/lib/picolena/templates/spec/controllers/documents_controller_spec.rb +8 -0
  45. data/lib/picolena/templates/spec/helpers/documents_helper_spec.rb +12 -1
  46. data/lib/picolena/templates/spec/models/basic_finder_spec.rb +6 -4
  47. data/lib/picolena/templates/spec/models/document_spec.rb +24 -4
  48. data/lib/picolena/templates/spec/models/finder_spec.rb +18 -11
  49. data/lib/picolena/templates/spec/models/host_indexing_system_spec.rb +1 -1
  50. data/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb +25 -8
  51. data/lib/picolena/templates/spec/models/query_spec.rb +4 -5
  52. data/lib/picolena/templates/spec/spec_helper.rb +9 -0
  53. data/lib/picolena/templates/spec/test_dirs/indexed/archives/dumb_file.rar +0 -0
  54. data/lib/picolena/templates/spec/test_dirs/indexed/archives/some_test_files.zip +0 -0
  55. data/lib/picolena/templates/spec/test_dirs/indexed/basic/fake_thumbnailer +14 -0
  56. data/lib/picolena/templates/spec/test_dirs/indexed/media/badminton.avi +0 -0
  57. data/lib/picolena/templates/spec/test_dirs/indexed/media/caution.tif +0 -0
  58. data/lib/picolena/templates/spec/test_dirs/indexed/media/cygnus.jpeg +0 -0
  59. data/lib/picolena/templates/spec/test_dirs/indexed/media/diceface.eps +79 -0
  60. data/lib/picolena/templates/spec/test_dirs/indexed/media/glass.png +0 -0
  61. data/lib/picolena/templates/spec/test_dirs/indexed/media/gnu.bmp +0 -0
  62. data/lib/picolena/templates/spec/test_dirs/indexed/media/picolena.psd +0 -0
  63. data/lib/picolena/templates/spec/test_dirs/indexed/media/rails_logo_remix.gif +0 -0
  64. data/lib/picolena/templates/spec/test_dirs/indexed/media/warning.tiff +0 -0
  65. data/lib/picolena/version.rb +1 -1
  66. data/website/index.html +1 -1
  67. metadata +31 -32
  68. data.tar.gz.sig +0 -0
  69. data/lib/picolena/templates/lib/plain_text_extractor_DSL.rb +0 -88
  70. metadata.gz.sig +0 -0
@@ -1,3 +1,17 @@
1
+ == 0.2.2 2009-02-13
2
+
3
+ * 3 major enhancements :
4
+ * Thumbnails created for pictures & videos
5
+ * Support for .zip & .rar archives
6
+ * Alias_path is now LetterTokenized
7
+
8
+ * 2 minor enhancements:
9
+ * More specs
10
+ * Some aesthetical changes
11
+
12
+ * 1 bug fix :
13
+ * Displaying filetypes without any icon would raise an Exception
14
+
1
15
  == 0.2.0 2009-02-02
2
16
 
3
17
  * 1 major enhancement :
@@ -37,22 +37,25 @@ lib/picolena/templates/config/boot.rb
37
37
  lib/picolena/templates/config/environments/development.rb
38
38
  lib/picolena/templates/config/environments/production.rb
39
39
  lib/picolena/templates/config/environments/test.rb
40
- lib/picolena/templates/config/initializers/001_load_custom_config.rb
41
- lib/picolena/templates/config/initializers/002_load_indexed_dirs.rb
42
- lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb
43
- lib/picolena/templates/config/initializers/004_load_plain_text_extractors.rb
44
- lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb
45
- lib/picolena/templates/config/initializers/006_load_icons.rb
46
- lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb
40
+ lib/picolena/templates/config/initializers/001_load_ferret.rb
41
+ lib/picolena/templates/config/initializers/002_load_custom_config.rb
42
+ lib/picolena/templates/config/initializers/003_load_indexed_dirs.rb
43
+ lib/picolena/templates/config/initializers/004_load_white_list_IPs.rb
44
+ lib/picolena/templates/config/initializers/005_load_plain_text_extractors.rb
45
+ lib/picolena/templates/config/initializers/006_load_custom_title_and_names_and_links.rb
46
+ lib/picolena/templates/config/initializers/007_load_icons.rb
47
+ lib/picolena/templates/config/initializers/008_load_performance_tweaks.rb
47
48
  lib/picolena/templates/config/routes.rb
48
49
  lib/picolena/templates/lang/ui/de.yml
49
50
  lib/picolena/templates/lang/ui/en.yml
50
51
  lib/picolena/templates/lang/ui/es.yml
51
52
  lib/picolena/templates/lang/ui/fr.yml
52
53
  lib/picolena/templates/lib/core_exts.rb
54
+ lib/picolena/templates/lib/development_helpers.rb
53
55
  lib/picolena/templates/lib/indexer_logger.rb
54
- lib/picolena/templates/lib/plain_text_extractor_DSL.rb
56
+ lib/picolena/templates/lib/plain_text_extractor_dsl.rb
55
57
  lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb
58
+ lib/picolena/templates/lib/plain_text_extractors/adobe.photoshop.rb
56
59
  lib/picolena/templates/lib/plain_text_extractors/html.rb
57
60
  lib/picolena/templates/lib/plain_text_extractors/ms.excel.rb
58
61
  lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb
@@ -63,7 +66,11 @@ lib/picolena/templates/lib/plain_text_extractors/opendocument.spreadsheet.rb
63
66
  lib/picolena/templates/lib/plain_text_extractors/opendocument.text.rb
64
67
  lib/picolena/templates/lib/plain_text_extractors/pictures.rb
65
68
  lib/picolena/templates/lib/plain_text_extractors/plain_text.rb
69
+ lib/picolena/templates/lib/plain_text_extractors/rar.rb
70
+ lib/picolena/templates/lib/plain_text_extractors/videos.rb
71
+ lib/picolena/templates/lib/plain_text_extractors/zip.rb
66
72
  lib/picolena/templates/lib/tasks/annotations.rake
73
+ lib/picolena/templates/lib/tasks/extract.rake
67
74
  lib/picolena/templates/lib/tasks/index.rake
68
75
  lib/picolena/templates/lib/tasks/install_dependencies.rake
69
76
  lib/picolena/templates/lib/tasks/log.rake
@@ -112,6 +119,7 @@ lib/picolena/templates/public/images/icons/txt.png
112
119
  lib/picolena/templates/public/images/icons/video.png
113
120
  lib/picolena/templates/public/images/icons/xls.png
114
121
  lib/picolena/templates/public/images/main_img.jpg
122
+ lib/picolena/templates/public/images/thumbnails/NOTE
115
123
  lib/picolena/templates/public/images/zafh_net.png
116
124
  lib/picolena/templates/public/robots.txt
117
125
  lib/picolena/templates/public/stylesheets/style.css
@@ -147,12 +155,15 @@ lib/picolena/templates/spec/rcov.opts
147
155
  lib/picolena/templates/spec/spec.opts
148
156
  lib/picolena/templates/spec/spec_helper.rb
149
157
  lib/picolena/templates/spec/test_dirs/indexed/README
158
+ lib/picolena/templates/spec/test_dirs/indexed/archives/dumb_file.rar
159
+ lib/picolena/templates/spec/test_dirs/indexed/archives/some_test_files.zip
150
160
  lib/picolena/templates/spec/test_dirs/indexed/basic/another_plain.text
151
161
  lib/picolena/templates/spec/test_dirs/indexed/basic/basic.odt
152
162
  lib/picolena/templates/spec/test_dirs/indexed/basic/basic.pdf
153
163
  lib/picolena/templates/spec/test_dirs/indexed/basic/basic.tex
154
164
  lib/picolena/templates/spec/test_dirs/indexed/basic/crossed.text
155
165
  lib/picolena/templates/spec/test_dirs/indexed/basic/crossed.txt
166
+ lib/picolena/templates/spec/test_dirs/indexed/basic/fake_thumbnailer
156
167
  lib/picolena/templates/spec/test_dirs/indexed/basic/hello.rb
157
168
  lib/picolena/templates/spec/test_dirs/indexed/basic/myfirstjavaprog.java
158
169
  lib/picolena/templates/spec/test_dirs/indexed/basic/one_page.ppt
@@ -173,7 +184,16 @@ lib/picolena/templates/spec/test_dirs/indexed/lang/lorca
173
184
  lib/picolena/templates/spec/test_dirs/indexed/lang/shakespeare
174
185
  lib/picolena/templates/spec/test_dirs/indexed/literature/Simulation of district heating systems for evaluation of real-time control strategies.pdf
175
186
  lib/picolena/templates/spec/test_dirs/indexed/literature/Types of malfunction in DH substations.doc
187
+ lib/picolena/templates/spec/test_dirs/indexed/media/badminton.avi
188
+ lib/picolena/templates/spec/test_dirs/indexed/media/caution.tif
176
189
  lib/picolena/templates/spec/test_dirs/indexed/media/crow.jpg
190
+ lib/picolena/templates/spec/test_dirs/indexed/media/cygnus.jpeg
191
+ lib/picolena/templates/spec/test_dirs/indexed/media/diceface.eps
192
+ lib/picolena/templates/spec/test_dirs/indexed/media/glass.png
193
+ lib/picolena/templates/spec/test_dirs/indexed/media/gnu.bmp
194
+ lib/picolena/templates/spec/test_dirs/indexed/media/picolena.psd
195
+ lib/picolena/templates/spec/test_dirs/indexed/media/rails_logo_remix.gif
196
+ lib/picolena/templates/spec/test_dirs/indexed/media/warning.tiff
177
197
  lib/picolena/templates/spec/test_dirs/indexed/others/'weird'filename.txt
178
198
  lib/picolena/templates/spec/test_dirs/indexed/others/7.html
179
199
  lib/picolena/templates/spec/test_dirs/indexed/others/BIN_FILE_WITHOUT_EXTENSION
@@ -13,3 +13,4 @@ lib/picolena/templates/tmp
13
13
  lib/picolena/templates/vendor
14
14
  lib/picolena/templates/coverage
15
15
  lib/picolena/templates/doc
16
+ lib/picolena/templates/public/images/thumbnails/*.jpg
@@ -2,7 +2,7 @@ require 'fileutils'
2
2
  include FileUtils
3
3
 
4
4
  require 'rubygems'
5
- %w[rake hoe newgem rubigen].each do |req_gem|
5
+ %w[rake newgem rubigen].each do |req_gem|
6
6
  begin
7
7
  require req_gem
8
8
  rescue LoadError
@@ -46,5 +46,6 @@ module Picolena
46
46
  # PerFieldAnalyzer is used to prevent queries like "language:it" to be broken by StopFilter.
47
47
  per_field_analyzer=Ferret::Analysis::PerFieldAnalyzer.new(Ferret::Analysis::StandardAnalyzer.new)
48
48
  per_field_analyzer[:language]=Ferret::Analysis::WhiteSpaceAnalyzer.new
49
+ per_field_analyzer[:alias_path]=Ferret::Analysis::LetterAnalyzerWithStopFilter.new
49
50
  Analyzer=per_field_analyzer
50
- end
51
+ end
@@ -3,6 +3,7 @@ video:
3
3
  wmv
4
4
  mpg
5
5
  mpeg
6
+ mov
6
7
  ogg:
7
8
  mp3
8
9
  ogg
@@ -17,6 +18,10 @@ txt:
17
18
  bib
18
19
  log
19
20
  ini
21
+ sub
22
+ srt
23
+ vcf
24
+ vcard
20
25
  no_extension
21
26
  doc:
22
27
  doc
@@ -87,7 +87,7 @@ class PicolenaGenerator < RubiGen::Base #:nodoc:
87
87
  m.rake 'spec' unless options[:no_spec]
88
88
 
89
89
  # Cleaning up temp folder if --spec-only
90
- m.clean if false
90
+ m.clean if options[:spec_only]
91
91
  end
92
92
  end
93
93
 
@@ -150,6 +150,7 @@ EOS
150
150
  public/images
151
151
  public/images/icons
152
152
  public/images/flags
153
+ public/images/thumbnails
153
154
  public/javascripts
154
155
  public/stylesheets
155
156
  spec
@@ -160,6 +161,7 @@ EOS
160
161
  spec/test_dirs
161
162
  spec/test_dirs/empty_folder
162
163
  spec/test_dirs/indexed
164
+ spec/test_dirs/indexed/archives
163
165
  spec/test_dirs/indexed/basic
164
166
  spec/test_dirs/indexed/different_encodings
165
167
  spec/test_dirs/indexed/just_one_doc
@@ -46,13 +46,13 @@ module DocumentsHelper
46
46
 
47
47
  # Returns icon and filename for any given document.
48
48
  def icon_and_filename_for(document)
49
- [icon_for(document.ext_as_sym),document.filename].join("&nbsp;")
49
+ [icon_for(document),document.filename].join("&nbsp;")
50
50
  end
51
51
 
52
52
  # Returns the location (if avaible) of the filetype icon.
53
- def icon_for(ext)
54
- icon_symbol=Picolena::FiletypeToIconSymbol[ext]
55
- image_tag("icons/#{icon_symbol}.png") if icon_symbol
53
+ def icon_for(document)
54
+ path=document.icon_path
55
+ image_tag(document.icon_path) if path
56
56
  end
57
57
 
58
58
  # Returns a link to a backup search engine that could maybe find more results for the same query.
@@ -87,10 +87,11 @@ class Document
87
87
 
88
88
  # Returns cached content with matching terms between '<<' '>>'.
89
89
  def highlighted_cache(raw_query)
90
- Indexer.index.highlight(Query.extract_from(raw_query), doc_id,
90
+ excerpts=Indexer.index.highlight(Query.extract_from(raw_query), doc_id,
91
91
  :field => :content, :excerpt_length => :all,
92
92
  :pre_tag => "<<", :post_tag => ">>"
93
- ).first
93
+ )
94
+ excerpts.is_an?(Array) ? excerpts.first : ""
94
95
  end
95
96
 
96
97
  # Returns the last modification date before the document got indexed.
@@ -127,18 +128,40 @@ class Document
127
128
 
128
129
  # Indexing fields that are shared between every document.
129
130
  def self.default_fields_for(complete_path)
131
+ doc=Document.new(complete_path)
130
132
  {
131
133
  :complete_path => complete_path,
132
134
  :probably_unique_id => complete_path.base26_hash,
135
+ :alias_path => doc.alias_path,
133
136
  :filename => File.basename(complete_path),
134
137
  :basename => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '),
135
138
  :filetype => File.extname(complete_path),
136
139
  :modified => File.mtime(complete_path).strftime("%Y%m%d%H%M%S")
137
140
  }
138
141
  end
139
-
140
-
142
+
143
+ # Returns thumbnail if available, mime icon otherwise
144
+ def icon_path
145
+ if File.exists?(thumbnail_path) then
146
+ thumbnail_path(:public_dir)
147
+ else
148
+ icon_symbol=Picolena::FiletypeToIconSymbol[ext_as_sym]
149
+ "icons/#{icon_symbol}.png" if icon_symbol
150
+ end
151
+ end
152
+
153
+ # Did at least one letter got extracted from the document?
154
+ # This boolean is used in views to know if a link should be
155
+ # displayed to show the content
156
+ def has_content?
157
+ cached =~ /\w/
158
+ end
159
+
141
160
  private
161
+
162
+ def thumbnail_path(public_dir=false)
163
+ File.thumbnail_path(complete_path,public_dir)
164
+ end
142
165
 
143
166
  # FIXME: Is there a way to easily retrieve doc_id for a given document?
144
167
  # Better yet, fix Index#highlight to accept :probably_unique_id and stop using :doc_id.
@@ -67,7 +67,8 @@ class Indexer
67
67
  def add_or_update_file(complete_path)
68
68
  document = Document.default_fields_for(complete_path)
69
69
  begin
70
- document.merge! PlainTextExtractor.extract_content_and_language_from(complete_path)
70
+ PlainTextExtractor.extract_thumbnail_from(complete_path)
71
+ document.merge! PlainTextExtractor.extract_information_from(complete_path)
71
72
  raise "empty document #{complete_path}" if document[:content].strip.empty?
72
73
  logger.add_document document
73
74
  rescue => e
@@ -177,6 +178,8 @@ class Indexer
177
178
  end
178
179
 
179
180
  # Copied from Ferret book, By David Balmain
181
+ # FIXME : Find an alternative that doesn't need any more dependency.
182
+ # NOTE: Not supported on windows.
180
183
  def index_time_dbm_file
181
184
  @@dbm_file ||= DBM.open(File.join(Picolena::MetaIndexPath, 'added_at'))
182
185
  end
@@ -201,13 +204,14 @@ class Indexer
201
204
 
202
205
  def default_field_infos
203
206
  returning Ferret::Index::FieldInfos.new do |field_infos|
207
+ field_infos.add_field(:probably_unique_id, :store => :no, :index => :untokenized)
204
208
  field_infos.add_field(:complete_path, :store => :yes, :index => :untokenized)
205
209
  field_infos.add_field(:content, :store => :yes, :index => :yes)
210
+ field_infos.add_field(:alias_path, :store => :no, :index => :yes, :boost => 0.5)
206
211
  field_infos.add_field(:basename, :store => :no, :index => :yes, :boost => 1.5)
207
212
  field_infos.add_field(:filename, :store => :no, :index => :yes, :boost => 1.5)
208
213
  field_infos.add_field(:filetype, :store => :no, :index => :yes, :boost => 1.5)
209
214
  field_infos.add_field(:modified, :store => :yes, :index => :untokenized)
210
- field_infos.add_field(:probably_unique_id, :store => :no, :index => :untokenized)
211
215
  field_infos.add_field(:language, :store => :yes, :index => :untokenized)
212
216
  end
213
217
  end
@@ -1,4 +1,4 @@
1
- require 'plain_text_extractor_DSL'
1
+ require 'plain_text_extractor_dsl'
2
2
 
3
3
  # PlainTextExtractor is the class responsible for extracting plain text contents from
4
4
  # different documents filetypes (.doc, .html, .pdf, .od?), as defined in
@@ -47,27 +47,29 @@ class PlainTextExtractor
47
47
  end
48
48
 
49
49
  # Launches extractor on given file and outputs plain text result and language (if found)
50
- def extract_content_and_language_from(source)
51
- find_by_filename(source).extract_content_and_language
50
+ def extract_information_from(source)
51
+ find_by_filename(source).extract_information
52
+ end
53
+
54
+ # Tries to extract a thumbnail from source.
55
+ # Doesn't do anything if thumbnail_command isn't defined for the corresponding filetype.
56
+ def extract_thumbnail_from(source)
57
+ find_by_filename(source).extract_thumbnail
52
58
  end
53
59
 
54
60
  # Returns which language guesser should be used by the system.
55
61
  # Returns nil if none is found.
56
62
  def language_guesser
57
- @@language_guesser||=('mguesser -n1' unless IO.popen("which mguesser"){|i| i.read}.empty?)
63
+ @@language_guesser||=('mguesser -n1' if 'mguesser'.installed?)
58
64
  end
59
65
  end
60
66
 
61
67
  attr_accessor :source
62
68
 
63
- # Parses command in order to know which programs are needed.
69
+ # Parses commands in order to know which programs are needed.
64
70
  # rspec will then check that every dependecy is installed on the system
65
71
  def dependencies
66
- if command.is_a?(String) then
67
- command.split(/\|\s*/).collect{|command_part| command_part.split(/ /).first}
68
- else
69
- @dependencies
70
- end
72
+ [@dependencies, command.dependencies, thumbnail_command.dependencies].flatten
71
73
  end
72
74
 
73
75
  ## Conversion part
@@ -79,11 +81,11 @@ class PlainTextExtractor
79
81
  # If command includes 'DESTINATION' keyword,
80
82
  # launches the command and returns the content of
81
83
  # DESTINATION file.
82
- IO.popen(specific_command){}
84
+ silently_execute(specific_command)
83
85
  File.read_and_remove(destination)
84
86
  else
85
87
  # Otherwise, launches the command and returns STDOUT.
86
- Open3.popen3(specific_command){|stdin,stdout,stderr| stdout.read}
88
+ silently_execute(specific_command)
87
89
  end
88
90
  else
89
91
  # command is a Block.
@@ -97,14 +99,16 @@ class PlainTextExtractor
97
99
  # using mguesser to guess used language.
98
100
  # This method only returns probable language if the content is bigger than 500 chars
99
101
  # and if probability score is higher than 90%.
100
- def extract_content_and_language
102
+ def extract_information
101
103
  content=extract_content
104
+
102
105
  return {:content => content} unless [# Is LanguageRecognition turned on? (cf config/custom/picolena.rb)
103
106
  Picolena::UseLanguageRecognition,
104
107
  # Is a language guesser already installed?
105
108
  PlainTextExtractor.language_guesser,
106
109
  # Language recognition is too unreliable for small files.
107
110
  content.size > 500].all?
111
+
108
112
  language=IO.popen(PlainTextExtractor.language_guesser,'w+'){|lang_guesser|
109
113
  lang_guesser.write content
110
114
  lang_guesser.close_write
@@ -115,9 +119,14 @@ class PlainTextExtractor
115
119
  lang unless score<0.9
116
120
  end
117
121
  }
122
+
118
123
  {:content => content, :language => language}
119
124
  end
120
125
 
126
+ def extract_thumbnail
127
+ silently_execute(specific_thumbnail_command) if thumbnail_command
128
+ end
129
+
121
130
  private
122
131
 
123
132
  # destination method can be used by some conversion command that cannot output to stdout (example?)
@@ -131,4 +140,9 @@ class PlainTextExtractor
131
140
  def specific_command
132
141
  command.sub('SOURCE','"'<<source<<'"').sub('DESTINATION','"'<<destination<<'"')
133
142
  end
143
+
144
+ # Replaces generic command with specific source and thumbnail (if specified) files
145
+ def specific_thumbnail_command
146
+ thumbnail_command.sub('SOURCE','"'<<source<<'"').sub('THUMBNAIL','"'<<File.thumbnail_path(source)<<'"')
147
+ end
134
148
  end
@@ -32,7 +32,7 @@ class Query
32
32
 
33
33
  # Instantiates a QueryParser once, and keeps it in cache.
34
34
  def parser
35
- @@parser ||= Ferret::QueryParser.new(:fields => [:content, :filename, :basename, :filetype, :modified], :or_default => false, :analyzer=>Picolena::Analyzer)
35
+ @@parser ||= Ferret::QueryParser.new(:fields => [:content, :filename, :basename, :alias_path, :filetype, :modified], :or_default => false, :analyzer=>Picolena::Analyzer)
36
36
  end
37
37
  end
38
- end
38
+ end
@@ -6,7 +6,7 @@
6
6
  %p
7
7
  =link_to_containing_directory(document)
8
8
  %br/
9
- -if document.supported?
9
+ -if document.supported? && document.has_content?
10
10
  =link_to_plain_text_content(document)
11
11
  &#45;
12
12
  =link_to_cached_content(document,query)
@@ -19,3 +19,5 @@ config.action_mailer.raise_delivery_errors = false
19
19
  module Picolena
20
20
  LOGLEVEL = Logger::DEBUG
21
21
  end
22
+
23
+ require 'development_helpers'
@@ -0,0 +1,17 @@
1
+ require 'ferret'
2
+ module Ferret
3
+ module Analysis
4
+ # Used for alias_path queries
5
+ class LetterAnalyzerWithStopFilter
6
+ def initialize(stop_words = FULL_ENGLISH_STOP_WORDS, lower = true)
7
+ @lower = lower
8
+ @stop_words = stop_words
9
+ end
10
+
11
+ def token_stream(field, str)
12
+ ts = LetterTokenizer.new(str, @lower)
13
+ StopFilter.new(ts, @stop_words)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,2 @@
1
1
  custom_config_file = File.join(RAILS_ROOT, 'config/custom/picolena.rb')
2
- require 'ferret'
3
- require custom_config_file
2
+ require custom_config_file
@@ -1,5 +1,5 @@
1
1
  require 'core_exts'
2
- require 'plain_text_extractor_DSL'
2
+ require 'plain_text_extractor_dsl'
3
3
  Picolena::Extractors=[]
4
4
  Dir.glob(File.join(RAILS_ROOT,'lib/plain_text_extractors/*.rb')).each{|extractor|
5
5
  require extractor