picolena 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. data/History.txt +14 -0
  2. data/Manifest.txt +28 -8
  3. data/config/files_to_clean +1 -0
  4. data/config/requirements.rb +1 -1
  5. data/lib/picolena/config/basic.rb +2 -1
  6. data/lib/picolena/config/icons_and_filetypes.yml +5 -0
  7. data/lib/picolena/picolena_generator.rb +3 -1
  8. data/lib/picolena/templates/app/helpers/documents_helper.rb +4 -4
  9. data/lib/picolena/templates/app/models/document.rb +27 -4
  10. data/lib/picolena/templates/app/models/indexer.rb +6 -2
  11. data/lib/picolena/templates/app/models/plain_text_extractor.rb +27 -13
  12. data/lib/picolena/templates/app/models/query.rb +2 -2
  13. data/lib/picolena/templates/app/views/documents/_document.html.haml +1 -1
  14. data/lib/picolena/templates/config/environments/development.rb +2 -0
  15. data/lib/picolena/templates/config/initializers/001_load_ferret.rb +17 -0
  16. data/lib/picolena/templates/config/initializers/{001_load_custom_config.rb → 002_load_custom_config.rb} +1 -2
  17. data/lib/picolena/templates/config/initializers/{002_load_indexed_dirs.rb → 003_load_indexed_dirs.rb} +0 -0
  18. data/lib/picolena/templates/config/initializers/{003_load_white_list_IPs.rb → 004_load_white_list_IPs.rb} +0 -0
  19. data/lib/picolena/templates/config/initializers/{004_load_plain_text_extractors.rb → 005_load_plain_text_extractors.rb} +1 -1
  20. data/lib/picolena/templates/config/initializers/{005_load_custom_title_and_names_and_links.rb → 006_load_custom_title_and_names_and_links.rb} +0 -0
  21. data/lib/picolena/templates/config/initializers/{006_load_icons.rb → 007_load_icons.rb} +0 -0
  22. data/lib/picolena/templates/config/initializers/{007_load_performance_tweaks.rb → 008_load_performance_tweaks.rb} +0 -0
  23. data/lib/picolena/templates/lib/core_exts.rb +52 -0
  24. data/lib/picolena/templates/lib/development_helpers.rb +35 -0
  25. data/lib/picolena/templates/lib/plain_text_extractor_dsl.rb +128 -0
  26. data/lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb +2 -2
  27. data/lib/picolena/templates/lib/plain_text_extractors/adobe.photoshop.rb +12 -0
  28. data/lib/picolena/templates/lib/plain_text_extractors/html.rb +1 -1
  29. data/lib/picolena/templates/lib/plain_text_extractors/ms.excel.rb +4 -4
  30. data/lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb +4 -4
  31. data/lib/picolena/templates/lib/plain_text_extractors/ms.rtf.rb +3 -3
  32. data/lib/picolena/templates/lib/plain_text_extractors/ms.word.rb +4 -4
  33. data/lib/picolena/templates/lib/plain_text_extractors/opendocument.presentation.rb +2 -2
  34. data/lib/picolena/templates/lib/plain_text_extractors/opendocument.spreadsheet.rb +2 -2
  35. data/lib/picolena/templates/lib/plain_text_extractors/opendocument.text.rb +2 -2
  36. data/lib/picolena/templates/lib/plain_text_extractors/pictures.rb +15 -4
  37. data/lib/picolena/templates/lib/plain_text_extractors/plain_text.rb +9 -2
  38. data/lib/picolena/templates/lib/plain_text_extractors/rar.rb +18 -0
  39. data/lib/picolena/templates/lib/plain_text_extractors/videos.rb +13 -0
  40. data/lib/picolena/templates/lib/plain_text_extractors/zip.rb +17 -0
  41. data/lib/picolena/templates/lib/tasks/extract.rake +16 -0
  42. data/lib/picolena/templates/lib/tasks/install_dependencies.rake +1 -1
  43. data/lib/picolena/templates/public/images/thumbnails/NOTE +2 -0
  44. data/lib/picolena/templates/spec/controllers/documents_controller_spec.rb +8 -0
  45. data/lib/picolena/templates/spec/helpers/documents_helper_spec.rb +12 -1
  46. data/lib/picolena/templates/spec/models/basic_finder_spec.rb +6 -4
  47. data/lib/picolena/templates/spec/models/document_spec.rb +24 -4
  48. data/lib/picolena/templates/spec/models/finder_spec.rb +18 -11
  49. data/lib/picolena/templates/spec/models/host_indexing_system_spec.rb +1 -1
  50. data/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb +25 -8
  51. data/lib/picolena/templates/spec/models/query_spec.rb +4 -5
  52. data/lib/picolena/templates/spec/spec_helper.rb +9 -0
  53. data/lib/picolena/templates/spec/test_dirs/indexed/archives/dumb_file.rar +0 -0
  54. data/lib/picolena/templates/spec/test_dirs/indexed/archives/some_test_files.zip +0 -0
  55. data/lib/picolena/templates/spec/test_dirs/indexed/basic/fake_thumbnailer +14 -0
  56. data/lib/picolena/templates/spec/test_dirs/indexed/media/badminton.avi +0 -0
  57. data/lib/picolena/templates/spec/test_dirs/indexed/media/caution.tif +0 -0
  58. data/lib/picolena/templates/spec/test_dirs/indexed/media/cygnus.jpeg +0 -0
  59. data/lib/picolena/templates/spec/test_dirs/indexed/media/diceface.eps +79 -0
  60. data/lib/picolena/templates/spec/test_dirs/indexed/media/glass.png +0 -0
  61. data/lib/picolena/templates/spec/test_dirs/indexed/media/gnu.bmp +0 -0
  62. data/lib/picolena/templates/spec/test_dirs/indexed/media/picolena.psd +0 -0
  63. data/lib/picolena/templates/spec/test_dirs/indexed/media/rails_logo_remix.gif +0 -0
  64. data/lib/picolena/templates/spec/test_dirs/indexed/media/warning.tiff +0 -0
  65. data/lib/picolena/version.rb +1 -1
  66. data/website/index.html +1 -1
  67. metadata +31 -32
  68. data.tar.gz.sig +0 -0
  69. data/lib/picolena/templates/lib/plain_text_extractor_DSL.rb +0 -88
  70. metadata.gz.sig +0 -0
@@ -1,3 +1,17 @@
1
+ == 0.2.2 2009-02-13
2
+
3
+ * 3 major enhancements :
4
+ * Thumbnails created for pictures & videos
5
+ * Support for .zip & .rar archives
6
+ * Alias_path is now LetterTokenized
7
+
8
+ * 2 minor enhancements:
9
+ * More specs
10
+ * Some aesthetical changes
11
+
12
+ * 1 bug fix :
13
+ * Displaying filetypes without any icon would raise an Exception
14
+
1
15
  == 0.2.0 2009-02-02
2
16
 
3
17
  * 1 major enhancement :
@@ -37,22 +37,25 @@ lib/picolena/templates/config/boot.rb
37
37
  lib/picolena/templates/config/environments/development.rb
38
38
  lib/picolena/templates/config/environments/production.rb
39
39
  lib/picolena/templates/config/environments/test.rb
40
- lib/picolena/templates/config/initializers/001_load_custom_config.rb
41
- lib/picolena/templates/config/initializers/002_load_indexed_dirs.rb
42
- lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb
43
- lib/picolena/templates/config/initializers/004_load_plain_text_extractors.rb
44
- lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb
45
- lib/picolena/templates/config/initializers/006_load_icons.rb
46
- lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb
40
+ lib/picolena/templates/config/initializers/001_load_ferret.rb
41
+ lib/picolena/templates/config/initializers/002_load_custom_config.rb
42
+ lib/picolena/templates/config/initializers/003_load_indexed_dirs.rb
43
+ lib/picolena/templates/config/initializers/004_load_white_list_IPs.rb
44
+ lib/picolena/templates/config/initializers/005_load_plain_text_extractors.rb
45
+ lib/picolena/templates/config/initializers/006_load_custom_title_and_names_and_links.rb
46
+ lib/picolena/templates/config/initializers/007_load_icons.rb
47
+ lib/picolena/templates/config/initializers/008_load_performance_tweaks.rb
47
48
  lib/picolena/templates/config/routes.rb
48
49
  lib/picolena/templates/lang/ui/de.yml
49
50
  lib/picolena/templates/lang/ui/en.yml
50
51
  lib/picolena/templates/lang/ui/es.yml
51
52
  lib/picolena/templates/lang/ui/fr.yml
52
53
  lib/picolena/templates/lib/core_exts.rb
54
+ lib/picolena/templates/lib/development_helpers.rb
53
55
  lib/picolena/templates/lib/indexer_logger.rb
54
- lib/picolena/templates/lib/plain_text_extractor_DSL.rb
56
+ lib/picolena/templates/lib/plain_text_extractor_dsl.rb
55
57
  lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb
58
+ lib/picolena/templates/lib/plain_text_extractors/adobe.photoshop.rb
56
59
  lib/picolena/templates/lib/plain_text_extractors/html.rb
57
60
  lib/picolena/templates/lib/plain_text_extractors/ms.excel.rb
58
61
  lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb
@@ -63,7 +66,11 @@ lib/picolena/templates/lib/plain_text_extractors/opendocument.spreadsheet.rb
63
66
  lib/picolena/templates/lib/plain_text_extractors/opendocument.text.rb
64
67
  lib/picolena/templates/lib/plain_text_extractors/pictures.rb
65
68
  lib/picolena/templates/lib/plain_text_extractors/plain_text.rb
69
+ lib/picolena/templates/lib/plain_text_extractors/rar.rb
70
+ lib/picolena/templates/lib/plain_text_extractors/videos.rb
71
+ lib/picolena/templates/lib/plain_text_extractors/zip.rb
66
72
  lib/picolena/templates/lib/tasks/annotations.rake
73
+ lib/picolena/templates/lib/tasks/extract.rake
67
74
  lib/picolena/templates/lib/tasks/index.rake
68
75
  lib/picolena/templates/lib/tasks/install_dependencies.rake
69
76
  lib/picolena/templates/lib/tasks/log.rake
@@ -112,6 +119,7 @@ lib/picolena/templates/public/images/icons/txt.png
112
119
  lib/picolena/templates/public/images/icons/video.png
113
120
  lib/picolena/templates/public/images/icons/xls.png
114
121
  lib/picolena/templates/public/images/main_img.jpg
122
+ lib/picolena/templates/public/images/thumbnails/NOTE
115
123
  lib/picolena/templates/public/images/zafh_net.png
116
124
  lib/picolena/templates/public/robots.txt
117
125
  lib/picolena/templates/public/stylesheets/style.css
@@ -147,12 +155,15 @@ lib/picolena/templates/spec/rcov.opts
147
155
  lib/picolena/templates/spec/spec.opts
148
156
  lib/picolena/templates/spec/spec_helper.rb
149
157
  lib/picolena/templates/spec/test_dirs/indexed/README
158
+ lib/picolena/templates/spec/test_dirs/indexed/archives/dumb_file.rar
159
+ lib/picolena/templates/spec/test_dirs/indexed/archives/some_test_files.zip
150
160
  lib/picolena/templates/spec/test_dirs/indexed/basic/another_plain.text
151
161
  lib/picolena/templates/spec/test_dirs/indexed/basic/basic.odt
152
162
  lib/picolena/templates/spec/test_dirs/indexed/basic/basic.pdf
153
163
  lib/picolena/templates/spec/test_dirs/indexed/basic/basic.tex
154
164
  lib/picolena/templates/spec/test_dirs/indexed/basic/crossed.text
155
165
  lib/picolena/templates/spec/test_dirs/indexed/basic/crossed.txt
166
+ lib/picolena/templates/spec/test_dirs/indexed/basic/fake_thumbnailer
156
167
  lib/picolena/templates/spec/test_dirs/indexed/basic/hello.rb
157
168
  lib/picolena/templates/spec/test_dirs/indexed/basic/myfirstjavaprog.java
158
169
  lib/picolena/templates/spec/test_dirs/indexed/basic/one_page.ppt
@@ -173,7 +184,16 @@ lib/picolena/templates/spec/test_dirs/indexed/lang/lorca
173
184
  lib/picolena/templates/spec/test_dirs/indexed/lang/shakespeare
174
185
  lib/picolena/templates/spec/test_dirs/indexed/literature/Simulation of district heating systems for evaluation of real-time control strategies.pdf
175
186
  lib/picolena/templates/spec/test_dirs/indexed/literature/Types of malfunction in DH substations.doc
187
+ lib/picolena/templates/spec/test_dirs/indexed/media/badminton.avi
188
+ lib/picolena/templates/spec/test_dirs/indexed/media/caution.tif
176
189
  lib/picolena/templates/spec/test_dirs/indexed/media/crow.jpg
190
+ lib/picolena/templates/spec/test_dirs/indexed/media/cygnus.jpeg
191
+ lib/picolena/templates/spec/test_dirs/indexed/media/diceface.eps
192
+ lib/picolena/templates/spec/test_dirs/indexed/media/glass.png
193
+ lib/picolena/templates/spec/test_dirs/indexed/media/gnu.bmp
194
+ lib/picolena/templates/spec/test_dirs/indexed/media/picolena.psd
195
+ lib/picolena/templates/spec/test_dirs/indexed/media/rails_logo_remix.gif
196
+ lib/picolena/templates/spec/test_dirs/indexed/media/warning.tiff
177
197
  lib/picolena/templates/spec/test_dirs/indexed/others/'weird'filename.txt
178
198
  lib/picolena/templates/spec/test_dirs/indexed/others/7.html
179
199
  lib/picolena/templates/spec/test_dirs/indexed/others/BIN_FILE_WITHOUT_EXTENSION
@@ -13,3 +13,4 @@ lib/picolena/templates/tmp
13
13
  lib/picolena/templates/vendor
14
14
  lib/picolena/templates/coverage
15
15
  lib/picolena/templates/doc
16
+ lib/picolena/templates/public/images/thumbnails/*.jpg
@@ -2,7 +2,7 @@ require 'fileutils'
2
2
  include FileUtils
3
3
 
4
4
  require 'rubygems'
5
- %w[rake hoe newgem rubigen].each do |req_gem|
5
+ %w[rake newgem rubigen].each do |req_gem|
6
6
  begin
7
7
  require req_gem
8
8
  rescue LoadError
@@ -46,5 +46,6 @@ module Picolena
46
46
  # PerFieldAnalyzer is used to prevent queries like "language:it" to be broken by StopFilter.
47
47
  per_field_analyzer=Ferret::Analysis::PerFieldAnalyzer.new(Ferret::Analysis::StandardAnalyzer.new)
48
48
  per_field_analyzer[:language]=Ferret::Analysis::WhiteSpaceAnalyzer.new
49
+ per_field_analyzer[:alias_path]=Ferret::Analysis::LetterAnalyzerWithStopFilter.new
49
50
  Analyzer=per_field_analyzer
50
- end
51
+ end
@@ -3,6 +3,7 @@ video:
3
3
  wmv
4
4
  mpg
5
5
  mpeg
6
+ mov
6
7
  ogg:
7
8
  mp3
8
9
  ogg
@@ -17,6 +18,10 @@ txt:
17
18
  bib
18
19
  log
19
20
  ini
21
+ sub
22
+ srt
23
+ vcf
24
+ vcard
20
25
  no_extension
21
26
  doc:
22
27
  doc
@@ -87,7 +87,7 @@ class PicolenaGenerator < RubiGen::Base #:nodoc:
87
87
  m.rake 'spec' unless options[:no_spec]
88
88
 
89
89
  # Cleaning up temp folder if --spec-only
90
- m.clean if false
90
+ m.clean if options[:spec_only]
91
91
  end
92
92
  end
93
93
 
@@ -150,6 +150,7 @@ EOS
150
150
  public/images
151
151
  public/images/icons
152
152
  public/images/flags
153
+ public/images/thumbnails
153
154
  public/javascripts
154
155
  public/stylesheets
155
156
  spec
@@ -160,6 +161,7 @@ EOS
160
161
  spec/test_dirs
161
162
  spec/test_dirs/empty_folder
162
163
  spec/test_dirs/indexed
164
+ spec/test_dirs/indexed/archives
163
165
  spec/test_dirs/indexed/basic
164
166
  spec/test_dirs/indexed/different_encodings
165
167
  spec/test_dirs/indexed/just_one_doc
@@ -46,13 +46,13 @@ module DocumentsHelper
46
46
 
47
47
  # Returns icon and filename for any given document.
48
48
  def icon_and_filename_for(document)
49
- [icon_for(document.ext_as_sym),document.filename].join("&nbsp;")
49
+ [icon_for(document),document.filename].join("&nbsp;")
50
50
  end
51
51
 
52
52
  # Returns the location (if avaible) of the filetype icon.
53
- def icon_for(ext)
54
- icon_symbol=Picolena::FiletypeToIconSymbol[ext]
55
- image_tag("icons/#{icon_symbol}.png") if icon_symbol
53
+ def icon_for(document)
54
+ path=document.icon_path
55
+ image_tag(document.icon_path) if path
56
56
  end
57
57
 
58
58
  # Returns a link to a backup search engine that could maybe find more results for the same query.
@@ -87,10 +87,11 @@ class Document
87
87
 
88
88
  # Returns cached content with matching terms between '<<' '>>'.
89
89
  def highlighted_cache(raw_query)
90
- Indexer.index.highlight(Query.extract_from(raw_query), doc_id,
90
+ excerpts=Indexer.index.highlight(Query.extract_from(raw_query), doc_id,
91
91
  :field => :content, :excerpt_length => :all,
92
92
  :pre_tag => "<<", :post_tag => ">>"
93
- ).first
93
+ )
94
+ excerpts.is_an?(Array) ? excerpts.first : ""
94
95
  end
95
96
 
96
97
  # Returns the last modification date before the document got indexed.
@@ -127,18 +128,40 @@ class Document
127
128
 
128
129
  # Indexing fields that are shared between every document.
129
130
  def self.default_fields_for(complete_path)
131
+ doc=Document.new(complete_path)
130
132
  {
131
133
  :complete_path => complete_path,
132
134
  :probably_unique_id => complete_path.base26_hash,
135
+ :alias_path => doc.alias_path,
133
136
  :filename => File.basename(complete_path),
134
137
  :basename => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '),
135
138
  :filetype => File.extname(complete_path),
136
139
  :modified => File.mtime(complete_path).strftime("%Y%m%d%H%M%S")
137
140
  }
138
141
  end
139
-
140
-
142
+
143
+ # Returns thumbnail if available, mime icon otherwise
144
+ def icon_path
145
+ if File.exists?(thumbnail_path) then
146
+ thumbnail_path(:public_dir)
147
+ else
148
+ icon_symbol=Picolena::FiletypeToIconSymbol[ext_as_sym]
149
+ "icons/#{icon_symbol}.png" if icon_symbol
150
+ end
151
+ end
152
+
153
+ # Did at least one letter got extracted from the document?
154
+ # This boolean is used in views to know if a link should be
155
+ # displayed to show the content
156
+ def has_content?
157
+ cached =~ /\w/
158
+ end
159
+
141
160
  private
161
+
162
+ def thumbnail_path(public_dir=false)
163
+ File.thumbnail_path(complete_path,public_dir)
164
+ end
142
165
 
143
166
  # FIXME: Is there a way to easily retrieve doc_id for a given document?
144
167
  # Better yet, fix Index#highlight to accept :probably_unique_id and stop using :doc_id.
@@ -67,7 +67,8 @@ class Indexer
67
67
  def add_or_update_file(complete_path)
68
68
  document = Document.default_fields_for(complete_path)
69
69
  begin
70
- document.merge! PlainTextExtractor.extract_content_and_language_from(complete_path)
70
+ PlainTextExtractor.extract_thumbnail_from(complete_path)
71
+ document.merge! PlainTextExtractor.extract_information_from(complete_path)
71
72
  raise "empty document #{complete_path}" if document[:content].strip.empty?
72
73
  logger.add_document document
73
74
  rescue => e
@@ -177,6 +178,8 @@ class Indexer
177
178
  end
178
179
 
179
180
  # Copied from Ferret book, By David Balmain
181
+ # FIXME : Find an alternative that doesn't need any more dependency.
182
+ # NOTE: Not supported on windows.
180
183
  def index_time_dbm_file
181
184
  @@dbm_file ||= DBM.open(File.join(Picolena::MetaIndexPath, 'added_at'))
182
185
  end
@@ -201,13 +204,14 @@ class Indexer
201
204
 
202
205
  def default_field_infos
203
206
  returning Ferret::Index::FieldInfos.new do |field_infos|
207
+ field_infos.add_field(:probably_unique_id, :store => :no, :index => :untokenized)
204
208
  field_infos.add_field(:complete_path, :store => :yes, :index => :untokenized)
205
209
  field_infos.add_field(:content, :store => :yes, :index => :yes)
210
+ field_infos.add_field(:alias_path, :store => :no, :index => :yes, :boost => 0.5)
206
211
  field_infos.add_field(:basename, :store => :no, :index => :yes, :boost => 1.5)
207
212
  field_infos.add_field(:filename, :store => :no, :index => :yes, :boost => 1.5)
208
213
  field_infos.add_field(:filetype, :store => :no, :index => :yes, :boost => 1.5)
209
214
  field_infos.add_field(:modified, :store => :yes, :index => :untokenized)
210
- field_infos.add_field(:probably_unique_id, :store => :no, :index => :untokenized)
211
215
  field_infos.add_field(:language, :store => :yes, :index => :untokenized)
212
216
  end
213
217
  end
@@ -1,4 +1,4 @@
1
- require 'plain_text_extractor_DSL'
1
+ require 'plain_text_extractor_dsl'
2
2
 
3
3
  # PlainTextExtractor is the class responsible for extracting plain text contents from
4
4
  # different documents filetypes (.doc, .html, .pdf, .od?), as defined in
@@ -47,27 +47,29 @@ class PlainTextExtractor
47
47
  end
48
48
 
49
49
  # Launches extractor on given file and outputs plain text result and language (if found)
50
- def extract_content_and_language_from(source)
51
- find_by_filename(source).extract_content_and_language
50
+ def extract_information_from(source)
51
+ find_by_filename(source).extract_information
52
+ end
53
+
54
+ # Tries to extract a thumbnail from source.
55
+ # Doesn't do anything if thumbnail_command isn't defined for the corresponding filetype.
56
+ def extract_thumbnail_from(source)
57
+ find_by_filename(source).extract_thumbnail
52
58
  end
53
59
 
54
60
  # Returns which language guesser should be used by the system.
55
61
  # Returns nil if none is found.
56
62
  def language_guesser
57
- @@language_guesser||=('mguesser -n1' unless IO.popen("which mguesser"){|i| i.read}.empty?)
63
+ @@language_guesser||=('mguesser -n1' if 'mguesser'.installed?)
58
64
  end
59
65
  end
60
66
 
61
67
  attr_accessor :source
62
68
 
63
- # Parses command in order to know which programs are needed.
69
+ # Parses commands in order to know which programs are needed.
64
70
  # rspec will then check that every dependecy is installed on the system
65
71
  def dependencies
66
- if command.is_a?(String) then
67
- command.split(/\|\s*/).collect{|command_part| command_part.split(/ /).first}
68
- else
69
- @dependencies
70
- end
72
+ [@dependencies, command.dependencies, thumbnail_command.dependencies].flatten
71
73
  end
72
74
 
73
75
  ## Conversion part
@@ -79,11 +81,11 @@ class PlainTextExtractor
79
81
  # If command includes 'DESTINATION' keyword,
80
82
  # launches the command and returns the content of
81
83
  # DESTINATION file.
82
- IO.popen(specific_command){}
84
+ silently_execute(specific_command)
83
85
  File.read_and_remove(destination)
84
86
  else
85
87
  # Otherwise, launches the command and returns STDOUT.
86
- Open3.popen3(specific_command){|stdin,stdout,stderr| stdout.read}
88
+ silently_execute(specific_command)
87
89
  end
88
90
  else
89
91
  # command is a Block.
@@ -97,14 +99,16 @@ class PlainTextExtractor
97
99
  # using mguesser to guess used language.
98
100
  # This method only returns probable language if the content is bigger than 500 chars
99
101
  # and if probability score is higher than 90%.
100
- def extract_content_and_language
102
+ def extract_information
101
103
  content=extract_content
104
+
102
105
  return {:content => content} unless [# Is LanguageRecognition turned on? (cf config/custom/picolena.rb)
103
106
  Picolena::UseLanguageRecognition,
104
107
  # Is a language guesser already installed?
105
108
  PlainTextExtractor.language_guesser,
106
109
  # Language recognition is too unreliable for small files.
107
110
  content.size > 500].all?
111
+
108
112
  language=IO.popen(PlainTextExtractor.language_guesser,'w+'){|lang_guesser|
109
113
  lang_guesser.write content
110
114
  lang_guesser.close_write
@@ -115,9 +119,14 @@ class PlainTextExtractor
115
119
  lang unless score<0.9
116
120
  end
117
121
  }
122
+
118
123
  {:content => content, :language => language}
119
124
  end
120
125
 
126
+ def extract_thumbnail
127
+ silently_execute(specific_thumbnail_command) if thumbnail_command
128
+ end
129
+
121
130
  private
122
131
 
123
132
  # destination method can be used by some conversion command that cannot output to stdout (example?)
@@ -131,4 +140,9 @@ class PlainTextExtractor
131
140
  def specific_command
132
141
  command.sub('SOURCE','"'<<source<<'"').sub('DESTINATION','"'<<destination<<'"')
133
142
  end
143
+
144
+ # Replaces generic command with specific source and thumbnail (if specified) files
145
+ def specific_thumbnail_command
146
+ thumbnail_command.sub('SOURCE','"'<<source<<'"').sub('THUMBNAIL','"'<<File.thumbnail_path(source)<<'"')
147
+ end
134
148
  end
@@ -32,7 +32,7 @@ class Query
32
32
 
33
33
  # Instantiates a QueryParser once, and keeps it in cache.
34
34
  def parser
35
- @@parser ||= Ferret::QueryParser.new(:fields => [:content, :filename, :basename, :filetype, :modified], :or_default => false, :analyzer=>Picolena::Analyzer)
35
+ @@parser ||= Ferret::QueryParser.new(:fields => [:content, :filename, :basename, :alias_path, :filetype, :modified], :or_default => false, :analyzer=>Picolena::Analyzer)
36
36
  end
37
37
  end
38
- end
38
+ end
@@ -6,7 +6,7 @@
6
6
  %p
7
7
  =link_to_containing_directory(document)
8
8
  %br/
9
- -if document.supported?
9
+ -if document.supported? && document.has_content?
10
10
  =link_to_plain_text_content(document)
11
11
  &#45;
12
12
  =link_to_cached_content(document,query)
@@ -19,3 +19,5 @@ config.action_mailer.raise_delivery_errors = false
19
19
  module Picolena
20
20
  LOGLEVEL = Logger::DEBUG
21
21
  end
22
+
23
+ require 'development_helpers'
@@ -0,0 +1,17 @@
1
+ require 'ferret'
2
+ module Ferret
3
+ module Analysis
4
+ # Used for alias_path queries
5
+ class LetterAnalyzerWithStopFilter
6
+ def initialize(stop_words = FULL_ENGLISH_STOP_WORDS, lower = true)
7
+ @lower = lower
8
+ @stop_words = stop_words
9
+ end
10
+
11
+ def token_stream(field, str)
12
+ ts = LetterTokenizer.new(str, @lower)
13
+ StopFilter.new(ts, @stop_words)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -1,3 +1,2 @@
1
1
  custom_config_file = File.join(RAILS_ROOT, 'config/custom/picolena.rb')
2
- require 'ferret'
3
- require custom_config_file
2
+ require custom_config_file
@@ -1,5 +1,5 @@
1
1
  require 'core_exts'
2
- require 'plain_text_extractor_DSL'
2
+ require 'plain_text_extractor_dsl'
3
3
  Picolena::Extractors=[]
4
4
  Dir.glob(File.join(RAILS_ROOT,'lib/plain_text_extractors/*.rb')).each{|extractor|
5
5
  require extractor