picolena 0.2.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +14 -0
- data/Manifest.txt +28 -8
- data/config/files_to_clean +1 -0
- data/config/requirements.rb +1 -1
- data/lib/picolena/config/basic.rb +2 -1
- data/lib/picolena/config/icons_and_filetypes.yml +5 -0
- data/lib/picolena/picolena_generator.rb +3 -1
- data/lib/picolena/templates/app/helpers/documents_helper.rb +4 -4
- data/lib/picolena/templates/app/models/document.rb +27 -4
- data/lib/picolena/templates/app/models/indexer.rb +6 -2
- data/lib/picolena/templates/app/models/plain_text_extractor.rb +27 -13
- data/lib/picolena/templates/app/models/query.rb +2 -2
- data/lib/picolena/templates/app/views/documents/_document.html.haml +1 -1
- data/lib/picolena/templates/config/environments/development.rb +2 -0
- data/lib/picolena/templates/config/initializers/001_load_ferret.rb +17 -0
- data/lib/picolena/templates/config/initializers/{001_load_custom_config.rb → 002_load_custom_config.rb} +1 -2
- data/lib/picolena/templates/config/initializers/{002_load_indexed_dirs.rb → 003_load_indexed_dirs.rb} +0 -0
- data/lib/picolena/templates/config/initializers/{003_load_white_list_IPs.rb → 004_load_white_list_IPs.rb} +0 -0
- data/lib/picolena/templates/config/initializers/{004_load_plain_text_extractors.rb → 005_load_plain_text_extractors.rb} +1 -1
- data/lib/picolena/templates/config/initializers/{005_load_custom_title_and_names_and_links.rb → 006_load_custom_title_and_names_and_links.rb} +0 -0
- data/lib/picolena/templates/config/initializers/{006_load_icons.rb → 007_load_icons.rb} +0 -0
- data/lib/picolena/templates/config/initializers/{007_load_performance_tweaks.rb → 008_load_performance_tweaks.rb} +0 -0
- data/lib/picolena/templates/lib/core_exts.rb +52 -0
- data/lib/picolena/templates/lib/development_helpers.rb +35 -0
- data/lib/picolena/templates/lib/plain_text_extractor_dsl.rb +128 -0
- data/lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb +2 -2
- data/lib/picolena/templates/lib/plain_text_extractors/adobe.photoshop.rb +12 -0
- data/lib/picolena/templates/lib/plain_text_extractors/html.rb +1 -1
- data/lib/picolena/templates/lib/plain_text_extractors/ms.excel.rb +4 -4
- data/lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb +4 -4
- data/lib/picolena/templates/lib/plain_text_extractors/ms.rtf.rb +3 -3
- data/lib/picolena/templates/lib/plain_text_extractors/ms.word.rb +4 -4
- data/lib/picolena/templates/lib/plain_text_extractors/opendocument.presentation.rb +2 -2
- data/lib/picolena/templates/lib/plain_text_extractors/opendocument.spreadsheet.rb +2 -2
- data/lib/picolena/templates/lib/plain_text_extractors/opendocument.text.rb +2 -2
- data/lib/picolena/templates/lib/plain_text_extractors/pictures.rb +15 -4
- data/lib/picolena/templates/lib/plain_text_extractors/plain_text.rb +9 -2
- data/lib/picolena/templates/lib/plain_text_extractors/rar.rb +18 -0
- data/lib/picolena/templates/lib/plain_text_extractors/videos.rb +13 -0
- data/lib/picolena/templates/lib/plain_text_extractors/zip.rb +17 -0
- data/lib/picolena/templates/lib/tasks/extract.rake +16 -0
- data/lib/picolena/templates/lib/tasks/install_dependencies.rake +1 -1
- data/lib/picolena/templates/public/images/thumbnails/NOTE +2 -0
- data/lib/picolena/templates/spec/controllers/documents_controller_spec.rb +8 -0
- data/lib/picolena/templates/spec/helpers/documents_helper_spec.rb +12 -1
- data/lib/picolena/templates/spec/models/basic_finder_spec.rb +6 -4
- data/lib/picolena/templates/spec/models/document_spec.rb +24 -4
- data/lib/picolena/templates/spec/models/finder_spec.rb +18 -11
- data/lib/picolena/templates/spec/models/host_indexing_system_spec.rb +1 -1
- data/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb +25 -8
- data/lib/picolena/templates/spec/models/query_spec.rb +4 -5
- data/lib/picolena/templates/spec/spec_helper.rb +9 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/archives/dumb_file.rar +0 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/archives/some_test_files.zip +0 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/basic/fake_thumbnailer +14 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/media/badminton.avi +0 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/media/caution.tif +0 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/media/cygnus.jpeg +0 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/media/diceface.eps +79 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/media/glass.png +0 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/media/gnu.bmp +0 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/media/picolena.psd +0 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/media/rails_logo_remix.gif +0 -0
- data/lib/picolena/templates/spec/test_dirs/indexed/media/warning.tiff +0 -0
- data/lib/picolena/version.rb +1 -1
- data/website/index.html +1 -1
- metadata +31 -32
- data.tar.gz.sig +0 -0
- data/lib/picolena/templates/lib/plain_text_extractor_DSL.rb +0 -88
- metadata.gz.sig +0 -0
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>Picolena</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/picolena"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/picolena" class="numbers">0.2.
|
36
|
+
<a href="http://rubyforge.org/projects/picolena" class="numbers">0.2.2</a>
|
37
37
|
</div>
|
38
38
|
<h2>Introduction</h2>
|
39
39
|
<p>’’Picolena’’ is a lightweight <a href="http://ferret.davebalmain.com/trac">ferret-powered</a> documents search engine written in <a href="http://www.rubyonrails.org/">Ruby on rails</a> :</p>
|
metadata
CHANGED
@@ -1,36 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picolena
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Duminil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
|
-
cert_chain:
|
11
|
-
- |
|
12
|
-
-----BEGIN CERTIFICATE-----
|
13
|
-
MIIDQjCCAiqgAwIBAgIBADANBgkqhkiG9w0BAQUFADBHMRUwEwYDVQQDDAxlcmlj
|
14
|
-
X2R1bWluaWwxGTAXBgoJkiaJk/IsZAEZFglydWJ5Zm9yZ2UxEzARBgoJkiaJk/Is
|
15
|
-
ZAEZFgNvcmcwHhcNMDgwNDA4MTAyMjU3WhcNMDkwNDA4MTAyMjU3WjBHMRUwEwYD
|
16
|
-
VQQDDAxlcmljX2R1bWluaWwxGTAXBgoJkiaJk/IsZAEZFglydWJ5Zm9yZ2UxEzAR
|
17
|
-
BgoJkiaJk/IsZAEZFgNvcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIB
|
18
|
-
AQC3t8Gt6wskwPkE8KKhTk3J6jwZniMoNCowrJhZV/syWOjWyLIcSjKBC4Q7NeEl
|
19
|
-
oCW0G/JZpP+om0RmTh36VeNeYBdWdbueHtwEdwHdB+0sBg0EmP72E0VJLdbf+RsQ
|
20
|
-
eADjL3rDIbjoyjeFG6wWbJlRdLyDoOVI+PmYyevYm0KmQLKS5WyZBbTaQgWpJSNB
|
21
|
-
FnYw37koqEkG6DRmOU8A2RWhD0+x+TxYRfX8ifNyBGj6SqRN729Qy2+J054i0AZ/
|
22
|
-
5fXCxe42uTleLwpGAmJqSbtJVX9hFWZNvo+heTMcau2kn+4s/6wxlmvIF8dlIKub
|
23
|
-
Y6LhOavjP1RwrI0uB7j4BVFfAgMBAAGjOTA3MAkGA1UdEwQCMAAwCwYDVR0PBAQD
|
24
|
-
AgSwMB0GA1UdDgQWBBTJxrSTvfQOyq89mHhqeUS1w8K1fzANBgkqhkiG9w0BAQUF
|
25
|
-
AAOCAQEASHKqT/2t3LqEFNlDbJxbILs6ILdRyE+r1q4L1kSrESmdQiZWWUvxV3FJ
|
26
|
-
bEUVhwgvJAGnH9sVm53NZ6783JPpEudi2+PL3ESUTpfxoH7T+q0uEzXe0yfPCSdz
|
27
|
-
FgMNTKGI3keMOHeMKOKJ/I48QkRVK0sGRqxuJlamQt/yP/OeAK6BNAaJkpDcn4zm
|
28
|
-
LlsXxhO4VWMx1Z19OtWR8VleDsWev/qFOUpWrcR2XLPbQpL09cFRvW7BD2gBWxCp
|
29
|
-
Z94n6iXMjIKhAtKZ/kY9K676h5dWUeSiyulqxYojkc/mx/wNJQJraIz2rFs086rn
|
30
|
-
qvI9FgPZ1QTG5uZAlBbk6d6JU2XfpA==
|
31
|
-
-----END CERTIFICATE-----
|
10
|
+
cert_chain: []
|
32
11
|
|
33
|
-
date: 2009-02-
|
12
|
+
date: 2009-02-13 00:00:00 +01:00
|
34
13
|
default_executable:
|
35
14
|
dependencies:
|
36
15
|
- !ruby/object:Gem::Dependency
|
@@ -191,22 +170,25 @@ files:
|
|
191
170
|
- lib/picolena/templates/config/environments/development.rb
|
192
171
|
- lib/picolena/templates/config/environments/production.rb
|
193
172
|
- lib/picolena/templates/config/environments/test.rb
|
194
|
-
- lib/picolena/templates/config/initializers/
|
195
|
-
- lib/picolena/templates/config/initializers/
|
196
|
-
- lib/picolena/templates/config/initializers/
|
197
|
-
- lib/picolena/templates/config/initializers/
|
198
|
-
- lib/picolena/templates/config/initializers/
|
199
|
-
- lib/picolena/templates/config/initializers/
|
200
|
-
- lib/picolena/templates/config/initializers/
|
173
|
+
- lib/picolena/templates/config/initializers/001_load_ferret.rb
|
174
|
+
- lib/picolena/templates/config/initializers/002_load_custom_config.rb
|
175
|
+
- lib/picolena/templates/config/initializers/003_load_indexed_dirs.rb
|
176
|
+
- lib/picolena/templates/config/initializers/004_load_white_list_IPs.rb
|
177
|
+
- lib/picolena/templates/config/initializers/005_load_plain_text_extractors.rb
|
178
|
+
- lib/picolena/templates/config/initializers/006_load_custom_title_and_names_and_links.rb
|
179
|
+
- lib/picolena/templates/config/initializers/007_load_icons.rb
|
180
|
+
- lib/picolena/templates/config/initializers/008_load_performance_tweaks.rb
|
201
181
|
- lib/picolena/templates/config/routes.rb
|
202
182
|
- lib/picolena/templates/lang/ui/de.yml
|
203
183
|
- lib/picolena/templates/lang/ui/en.yml
|
204
184
|
- lib/picolena/templates/lang/ui/es.yml
|
205
185
|
- lib/picolena/templates/lang/ui/fr.yml
|
206
186
|
- lib/picolena/templates/lib/core_exts.rb
|
187
|
+
- lib/picolena/templates/lib/development_helpers.rb
|
207
188
|
- lib/picolena/templates/lib/indexer_logger.rb
|
208
|
-
- lib/picolena/templates/lib/
|
189
|
+
- lib/picolena/templates/lib/plain_text_extractor_dsl.rb
|
209
190
|
- lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb
|
191
|
+
- lib/picolena/templates/lib/plain_text_extractors/adobe.photoshop.rb
|
210
192
|
- lib/picolena/templates/lib/plain_text_extractors/html.rb
|
211
193
|
- lib/picolena/templates/lib/plain_text_extractors/ms.excel.rb
|
212
194
|
- lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb
|
@@ -217,7 +199,11 @@ files:
|
|
217
199
|
- lib/picolena/templates/lib/plain_text_extractors/opendocument.text.rb
|
218
200
|
- lib/picolena/templates/lib/plain_text_extractors/pictures.rb
|
219
201
|
- lib/picolena/templates/lib/plain_text_extractors/plain_text.rb
|
202
|
+
- lib/picolena/templates/lib/plain_text_extractors/rar.rb
|
203
|
+
- lib/picolena/templates/lib/plain_text_extractors/videos.rb
|
204
|
+
- lib/picolena/templates/lib/plain_text_extractors/zip.rb
|
220
205
|
- lib/picolena/templates/lib/tasks/annotations.rake
|
206
|
+
- lib/picolena/templates/lib/tasks/extract.rake
|
221
207
|
- lib/picolena/templates/lib/tasks/index.rake
|
222
208
|
- lib/picolena/templates/lib/tasks/install_dependencies.rake
|
223
209
|
- lib/picolena/templates/lib/tasks/log.rake
|
@@ -266,6 +252,7 @@ files:
|
|
266
252
|
- lib/picolena/templates/public/images/icons/video.png
|
267
253
|
- lib/picolena/templates/public/images/icons/xls.png
|
268
254
|
- lib/picolena/templates/public/images/main_img.jpg
|
255
|
+
- lib/picolena/templates/public/images/thumbnails/NOTE
|
269
256
|
- lib/picolena/templates/public/images/zafh_net.png
|
270
257
|
- lib/picolena/templates/public/robots.txt
|
271
258
|
- lib/picolena/templates/public/stylesheets/style.css
|
@@ -301,12 +288,15 @@ files:
|
|
301
288
|
- lib/picolena/templates/spec/spec.opts
|
302
289
|
- lib/picolena/templates/spec/spec_helper.rb
|
303
290
|
- lib/picolena/templates/spec/test_dirs/indexed/README
|
291
|
+
- lib/picolena/templates/spec/test_dirs/indexed/archives/dumb_file.rar
|
292
|
+
- lib/picolena/templates/spec/test_dirs/indexed/archives/some_test_files.zip
|
304
293
|
- lib/picolena/templates/spec/test_dirs/indexed/basic/another_plain.text
|
305
294
|
- lib/picolena/templates/spec/test_dirs/indexed/basic/basic.odt
|
306
295
|
- lib/picolena/templates/spec/test_dirs/indexed/basic/basic.pdf
|
307
296
|
- lib/picolena/templates/spec/test_dirs/indexed/basic/basic.tex
|
308
297
|
- lib/picolena/templates/spec/test_dirs/indexed/basic/crossed.text
|
309
298
|
- lib/picolena/templates/spec/test_dirs/indexed/basic/crossed.txt
|
299
|
+
- lib/picolena/templates/spec/test_dirs/indexed/basic/fake_thumbnailer
|
310
300
|
- lib/picolena/templates/spec/test_dirs/indexed/basic/hello.rb
|
311
301
|
- lib/picolena/templates/spec/test_dirs/indexed/basic/myfirstjavaprog.java
|
312
302
|
- lib/picolena/templates/spec/test_dirs/indexed/basic/one_page.ppt
|
@@ -327,7 +317,16 @@ files:
|
|
327
317
|
- lib/picolena/templates/spec/test_dirs/indexed/lang/shakespeare
|
328
318
|
- lib/picolena/templates/spec/test_dirs/indexed/literature/Simulation of district heating systems for evaluation of real-time control strategies.pdf
|
329
319
|
- lib/picolena/templates/spec/test_dirs/indexed/literature/Types of malfunction in DH substations.doc
|
320
|
+
- lib/picolena/templates/spec/test_dirs/indexed/media/badminton.avi
|
321
|
+
- lib/picolena/templates/spec/test_dirs/indexed/media/caution.tif
|
330
322
|
- lib/picolena/templates/spec/test_dirs/indexed/media/crow.jpg
|
323
|
+
- lib/picolena/templates/spec/test_dirs/indexed/media/cygnus.jpeg
|
324
|
+
- lib/picolena/templates/spec/test_dirs/indexed/media/diceface.eps
|
325
|
+
- lib/picolena/templates/spec/test_dirs/indexed/media/glass.png
|
326
|
+
- lib/picolena/templates/spec/test_dirs/indexed/media/gnu.bmp
|
327
|
+
- lib/picolena/templates/spec/test_dirs/indexed/media/picolena.psd
|
328
|
+
- lib/picolena/templates/spec/test_dirs/indexed/media/rails_logo_remix.gif
|
329
|
+
- lib/picolena/templates/spec/test_dirs/indexed/media/warning.tiff
|
331
330
|
- lib/picolena/templates/spec/test_dirs/indexed/others/'weird'filename.txt
|
332
331
|
- lib/picolena/templates/spec/test_dirs/indexed/others/7.html
|
333
332
|
- lib/picolena/templates/spec/test_dirs/indexed/others/BIN_FILE_WITHOUT_EXTENSION
|
data.tar.gz.sig
DELETED
Binary file
|
@@ -1,88 +0,0 @@
|
|
1
|
-
# Defines plain text extractors with DSL
|
2
|
-
# For example, to convert "Microsoft Office Word document" to plain text
|
3
|
-
# PlainTextExtractor.new {
|
4
|
-
# every :doc, :dot
|
5
|
-
# as "application/msword"
|
6
|
-
# aka "Microsoft Office Word document"
|
7
|
-
# with "antiword SOURCE" => :on_linux, "some other command" => :on_windows
|
8
|
-
# which_should_for_example_extract 'district heating', :from => 'Types of malfunction in DH substations.doc'
|
9
|
-
# or_extract 'Basic Word template for Picolena specs', :from => 'office2003-word-template.dot'
|
10
|
-
# }
|
11
|
-
|
12
|
-
require 'open3'
|
13
|
-
module PlainTextExtractorDSL
|
14
|
-
attr_reader :exts, :mime_name, :description, :command, :content_and_file_examples
|
15
|
-
|
16
|
-
def initialize(&block)
|
17
|
-
@content_and_file_examples=[]
|
18
|
-
self.instance_eval(&block)
|
19
|
-
PlainTextExtractor.add(self)
|
20
|
-
end
|
21
|
-
|
22
|
-
def every(*exts)
|
23
|
-
@exts=exts
|
24
|
-
end
|
25
|
-
|
26
|
-
def as(mime_name)
|
27
|
-
@mime_name=mime_name
|
28
|
-
end
|
29
|
-
|
30
|
-
def aka(description)
|
31
|
-
@description=description
|
32
|
-
end
|
33
|
-
|
34
|
-
def which_requires(*dependencies)
|
35
|
-
@dependencies=dependencies
|
36
|
-
end
|
37
|
-
|
38
|
-
#used by rspec to test extractors:
|
39
|
-
# which_should_for_example_extract 'in a pdf file', :from => 'basic.pdf'
|
40
|
-
# or_extract 'some other stuff inside another pdf file', :from => 'yet_another.pdf'
|
41
|
-
#
|
42
|
-
#this spec will pass if 'basic.pdf' and 'yet_another.pdf' are included in an indexed directory, if every dependency is installed,
|
43
|
-
#and if plain text output from the extractor applied to 'basic.pdf' and 'yet_another.pdf' respectively include 'in a pdf file' and 'some other stuff inside another pdf file'
|
44
|
-
def which_should_for_example_extract(content, file)
|
45
|
-
@content_and_file_examples << [content,file[:from]]
|
46
|
-
end
|
47
|
-
|
48
|
-
#it allows to define specs in this way:
|
49
|
-
# which_should_for_example_extract 'Hello world!', :from => 'hello.rb'
|
50
|
-
# or_extract 'text inside!', :from => 'crossed.txt'
|
51
|
-
alias_method :or_extract, :which_should_for_example_extract
|
52
|
-
|
53
|
-
def with(command_as_hash_or_string=nil,&block)
|
54
|
-
#TODO: Find a better way to manage platforms, and include OS X, Vista, BSD...
|
55
|
-
platform=case RUBY_PLATFORM
|
56
|
-
when /linux/
|
57
|
-
:linux
|
58
|
-
when /win/
|
59
|
-
:windows
|
60
|
-
when /darwin/
|
61
|
-
:mac_os
|
62
|
-
end
|
63
|
-
@command=case command_as_hash_or_string
|
64
|
-
when String
|
65
|
-
command_as_hash_or_string
|
66
|
-
when Hash
|
67
|
-
# Allows to write
|
68
|
-
# with "pdftotext -enc UTF-8 SOURCE -" => :on_linux_and_mac_os,
|
69
|
-
# "some other command" => :on_windows
|
70
|
-
#
|
71
|
-
# On linux and mac_os platforms, it returns "pdftotext -enc UTF-8 SOURCE -",
|
72
|
-
# on windows, it returns "some other command"
|
73
|
-
#
|
74
|
-
# If commands for linux & mac os were different :
|
75
|
-
# with "some command" => :on_linux,
|
76
|
-
# "another command" => :on_mac_os,
|
77
|
-
# "yet another command" => :on_windows
|
78
|
-
#
|
79
|
-
#TODO: Make it clearer and more robust.
|
80
|
-
#NOTE: What to do when no command is defined for a given platform?
|
81
|
-
command_as_hash_or_string.invert.find{|platforms,command|
|
82
|
-
platforms.to_s.split(/_?and_?/i).collect{|on_platform| on_platform.sub(/on_/,'').to_sym}.include?(platform)
|
83
|
-
}.last.dup
|
84
|
-
else
|
85
|
-
block || raise("No command defined for this extractor: #{description}")
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
metadata.gz.sig
DELETED
Binary file
|