picolena 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +11 -0
- data/Manifest.txt +11 -6
- data/config/files_to_clean +12 -0
- data/config/hoe.rb +7 -3
- data/lib/picolena/{templates/config/initializers/001_prepare_and_load_custom_config.rb → config/basic.rb} +14 -19
- data/lib/picolena/config/title_and_names_and_links.yml +26 -0
- data/lib/picolena/picolena_generator.rb +8 -3
- data/lib/picolena/templates/app/controllers/application.rb +10 -3
- data/lib/picolena/templates/app/controllers/documents_controller.rb +26 -7
- data/lib/picolena/templates/app/helpers/application_helper.rb +1 -4
- data/lib/picolena/templates/app/helpers/documents_helper.rb +29 -7
- data/lib/picolena/templates/app/models/document.rb +53 -19
- data/lib/picolena/templates/app/models/finder.rb +30 -14
- data/lib/picolena/templates/app/views/documents/_document.html.haml +5 -2
- data/lib/picolena/templates/app/views/documents/cached.html.haml +8 -0
- data/lib/picolena/templates/app/views/documents/{show_content.html.haml → content.html.haml} +1 -1
- data/lib/picolena/templates/app/views/documents/show.html.haml +1 -1
- data/lib/picolena/templates/config/boot.rb +4 -4
- data/lib/picolena/templates/config/initializers/001_load_custom_config.rb +3 -0
- data/lib/picolena/templates/config/initializers/{002_indexed_dirs.rb → 002_load_indexed_dirs.rb} +1 -1
- data/lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb +8 -0
- data/lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb +5 -0
- data/lib/picolena/templates/config/routes.rb +1 -1
- data/lib/picolena/templates/lang/ui/de.yml +2 -4
- data/lib/picolena/templates/lang/ui/en.yml +2 -4
- data/lib/picolena/templates/lang/ui/es.yml +3 -5
- data/lib/picolena/templates/lang/ui/fr.yml +3 -5
- data/lib/picolena/templates/lib/core_exts.rb +1 -1
- data/lib/picolena/templates/lib/ff.rb +4 -4
- data/lib/picolena/templates/spec/controllers/documents_controller_spec.rb +9 -8
- data/lib/picolena/templates/spec/models/basic_finder_spec.rb +3 -10
- data/lib/picolena/templates/spec/models/document_spec.rb +2 -2
- data/lib/picolena/templates/spec/models/finder_spec.rb +30 -9
- data/lib/picolena/templates/spec/models/host_indexing_system_spec.rb +12 -6
- data/lib/picolena/version.rb +1 -1
- data/lib/rubigen_ext.rb +3 -3
- data/tasks/hack.rake +17 -0
- data/tasks/rspec.rake +6 -20
- data/tasks/test.rake +3 -0
- data/website/index.html +2 -2
- data.tar.gz.sig +0 -0
- metadata +24 -10
- metadata.gz.sig +0 -0
- data/lib/picolena/templates/config/initializers/003_white_list_IPs.rb +0 -8
- data/spec/picolena_spec.rb +0 -3
- data/tasks/default.rake +0 -1
data/History.txt
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
== 0.1.1 2008-04-12
|
|
2
|
+
|
|
3
|
+
* major enhancement:
|
|
4
|
+
* cache à la Google
|
|
5
|
+
|
|
6
|
+
* minor enhancements:
|
|
7
|
+
* easier configuration. Every picolena specific conf files in config/custom
|
|
8
|
+
* lots of refactoring
|
|
9
|
+
|
|
10
|
+
* bug fixes
|
|
11
|
+
|
|
1
12
|
== 0.1.0 2008-04-08
|
|
2
13
|
|
|
3
14
|
* minor enhancements:
|
data/Manifest.txt
CHANGED
|
@@ -4,10 +4,13 @@ Manifest.txt
|
|
|
4
4
|
README.txt
|
|
5
5
|
Rakefile
|
|
6
6
|
bin/picolena
|
|
7
|
+
config/files_to_clean
|
|
7
8
|
config/hoe.rb
|
|
8
9
|
config/requirements.rb
|
|
9
10
|
lib/picolena/USAGE
|
|
11
|
+
lib/picolena/config/basic.rb
|
|
10
12
|
lib/picolena/config/indexed_directories.yml
|
|
13
|
+
lib/picolena/config/title_and_names_and_links.yml
|
|
11
14
|
lib/picolena/config/white_list_ip.yml
|
|
12
15
|
lib/picolena/picolena_generator.rb
|
|
13
16
|
lib/picolena/templates/MIT-LICENSE
|
|
@@ -19,19 +22,21 @@ lib/picolena/templates/app/helpers/documents_helper.rb
|
|
|
19
22
|
lib/picolena/templates/app/models/document.rb
|
|
20
23
|
lib/picolena/templates/app/models/finder.rb
|
|
21
24
|
lib/picolena/templates/app/views/documents/_document.html.haml
|
|
25
|
+
lib/picolena/templates/app/views/documents/cached.html.haml
|
|
26
|
+
lib/picolena/templates/app/views/documents/content.html.haml
|
|
22
27
|
lib/picolena/templates/app/views/documents/index.html.haml
|
|
23
28
|
lib/picolena/templates/app/views/documents/show.html.haml
|
|
24
|
-
lib/picolena/templates/app/views/documents/show_content.html.haml
|
|
25
29
|
lib/picolena/templates/app/views/layouts/application.html.haml
|
|
26
30
|
lib/picolena/templates/config/boot.rb
|
|
27
31
|
lib/picolena/templates/config/environment.rb
|
|
28
32
|
lib/picolena/templates/config/environments/development.rb
|
|
29
33
|
lib/picolena/templates/config/environments/production.rb
|
|
30
34
|
lib/picolena/templates/config/environments/test.rb
|
|
31
|
-
lib/picolena/templates/config/initializers/
|
|
32
|
-
lib/picolena/templates/config/initializers/
|
|
33
|
-
lib/picolena/templates/config/initializers/
|
|
35
|
+
lib/picolena/templates/config/initializers/001_load_custom_config.rb
|
|
36
|
+
lib/picolena/templates/config/initializers/002_load_indexed_dirs.rb
|
|
37
|
+
lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb
|
|
34
38
|
lib/picolena/templates/config/initializers/004_load_filters.rb
|
|
39
|
+
lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb
|
|
35
40
|
lib/picolena/templates/config/routes.rb
|
|
36
41
|
lib/picolena/templates/lang/ui/de.yml
|
|
37
42
|
lib/picolena/templates/lang/ui/en.yml
|
|
@@ -765,13 +770,13 @@ script/destroy
|
|
|
765
770
|
script/generate
|
|
766
771
|
script/txt2html
|
|
767
772
|
setup.rb
|
|
768
|
-
spec/picolena_spec.rb
|
|
769
|
-
tasks/default.rake
|
|
770
773
|
tasks/deployment.rake
|
|
771
774
|
tasks/environment.rake
|
|
775
|
+
tasks/hack.rake
|
|
772
776
|
tasks/rdoc.rake
|
|
773
777
|
tasks/ri.rake
|
|
774
778
|
tasks/rspec.rake
|
|
779
|
+
tasks/test.rake
|
|
775
780
|
tasks/website.rake
|
|
776
781
|
website/index.html
|
|
777
782
|
website/index.txt
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
lib/picolena/templates/LICENSE
|
|
2
|
+
lib/picolena/templates/README
|
|
3
|
+
lib/picolena/templates/config/custom/picolena.rb
|
|
4
|
+
lib/picolena/templates/config/custom/indexed_directories.yml
|
|
5
|
+
lib/picolena/templates/config/custom/white_list_ip.yml
|
|
6
|
+
lib/picolena/templates/config/custom/title_and_names_and_links.yml
|
|
7
|
+
lib/picolena/templates/log
|
|
8
|
+
lib/picolena/templates/spec/test_dirs/indexed/others/bäñüßé.txt
|
|
9
|
+
lib/picolena/templates/tmp
|
|
10
|
+
lib/picolena/templates/vendor
|
|
11
|
+
lib/picolena/templates/coverage
|
|
12
|
+
lib/picolena/templates/doc
|
data/config/hoe.rb
CHANGED
|
@@ -54,17 +54,21 @@ hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
|
|
54
54
|
p.url = HOMEPATH
|
|
55
55
|
p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
|
|
56
56
|
p.test_globs = ["spec/picolena_spec.rb"]
|
|
57
|
-
|
|
57
|
+
|
|
58
|
+
files_to_clean = File.readlines(File.join(File.dirname(__FILE__),'files_to_clean')).collect{|line| line.chomp}
|
|
59
|
+
p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']+files_to_clean #An array of file patterns to delete on clean.
|
|
58
60
|
|
|
59
61
|
# == Optional
|
|
60
62
|
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
|
61
63
|
p.extra_deps = [ ['rails', '>= 2.0.2'],
|
|
62
|
-
#Ferret 0.11.6 is not yet available for win32
|
|
64
|
+
# Ferret 0.11.6 is not yet available for win32
|
|
65
|
+
# FIXME: How to require 0.11.6 for *nix and 0.11.5 for win32?
|
|
63
66
|
['ferret', '>= 0.11.5'],
|
|
64
67
|
['haml', '>= 1.8.2'],
|
|
65
68
|
['rubyzip', '>= 0.9.1'],
|
|
66
69
|
['rubigen', '>= 1.2.4'],
|
|
67
|
-
['paginator', '>= 1.1.0']
|
|
70
|
+
['paginator', '>= 1.1.0'],
|
|
71
|
+
['rspec', '>= 1.1.3']
|
|
68
72
|
]
|
|
69
73
|
|
|
70
74
|
#p.spec_extras = {} # A hash of extra values to set in the gemspec.
|
|
@@ -1,16 +1,3 @@
|
|
|
1
|
-
### You should not modify this file if you'd like to customize your search engine.
|
|
2
|
-
### Please modify config/custom.rb instead.
|
|
3
|
-
### A template config/custom.rb will be created the first time you launch your web server.
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
custom_config_file = File.join(RAILS_ROOT, 'config/custom.rb')
|
|
7
|
-
|
|
8
|
-
File.open(custom_config_file,'w'){|custom|
|
|
9
|
-
custom.puts <<-DEFAULT_CONF
|
|
10
|
-
### This file has been automatically generated the first time you launched your web server.
|
|
11
|
-
### You should add custom requirements here, they will be loaded everytime you restart the web server.
|
|
12
|
-
|
|
13
|
-
|
|
14
1
|
# Specify indexes path.
|
|
15
2
|
# Storage should be sufficient in order to store all indexed data.
|
|
16
3
|
IndexesSavePath=File.join(RAILS_ROOT, 'tmp/ferret_indexes/')
|
|
@@ -27,14 +14,22 @@ Globalite.language = :en
|
|
|
27
14
|
# Specify which locale should be used by Ferret
|
|
28
15
|
Ferret.locale = "en_US.UTF-8"
|
|
29
16
|
|
|
17
|
+
|
|
30
18
|
# Results per page
|
|
31
19
|
ResultsPerPage = 10
|
|
32
20
|
|
|
21
|
+
|
|
22
|
+
# Length of "probably unique id" 's
|
|
23
|
+
# Those id's are used to characterize every document, thus allowing tiny URLs in Controllers
|
|
24
|
+
# HashLength = 10
|
|
25
|
+
# Document.new("whatever.pdf").probably_unique_id => "bbuxhynait"
|
|
26
|
+
# HashLength = 20
|
|
27
|
+
# Document.new("whatever.pdf").probably_unique_id => "jfzjkyfkfkbbuxhynait"
|
|
28
|
+
# The more documents you have, the bigger HashLength should be in order to avoid collisions.
|
|
29
|
+
# It would not be wise (and specs won't pass) to specify HashLength smaller than 10.
|
|
30
|
+
HashLength = 10
|
|
31
|
+
|
|
32
|
+
|
|
33
33
|
# Specify the default Levenshtein distance when using FuzzyQuery
|
|
34
34
|
# see http://ferret.davebalmain.com/api/classes/Ferret/QueryParser.html for more information.
|
|
35
|
-
Ferret::Search::FuzzyQuery.default_min_similarity=0.6
|
|
36
|
-
DEFAULT_CONF
|
|
37
|
-
} unless File.readable?(custom_config_file)
|
|
38
|
-
|
|
39
|
-
require 'ferret'
|
|
40
|
-
require custom_config_file
|
|
35
|
+
Ferret::Search::FuzzyQuery.default_min_similarity=0.6
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
## Link to a website, located in the bottom-left corner.
|
|
2
|
+
|
|
3
|
+
website_name: Picolena v<%= version %>
|
|
4
|
+
website_url: http://picolena.devjavu.com/
|
|
5
|
+
|
|
6
|
+
## Link to a backup search engine, if no document was found for a given query.
|
|
7
|
+
|
|
8
|
+
backup_search_engine_name: Google?
|
|
9
|
+
backup_search_engine_url: http://www.google.com/search?q=
|
|
10
|
+
#backup_search_engine_name: Yahoo!
|
|
11
|
+
#backup_search_engine_url: http://search.yahoo.com/search?p=
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
## Fields that are not blank will overwrite localization.
|
|
15
|
+
## Leave them as they are if you're happy with the current localization.
|
|
16
|
+
|
|
17
|
+
# page_title: Picolena Search Engine
|
|
18
|
+
page_title:
|
|
19
|
+
# help: Help
|
|
20
|
+
help:
|
|
21
|
+
# help_file: /help/PicolenaHowTo-en.pdf
|
|
22
|
+
help_file:
|
|
23
|
+
# type_your_query_here: Type your query here
|
|
24
|
+
type_your_query_here:
|
|
25
|
+
# search: Search!
|
|
26
|
+
search:
|
|
@@ -2,7 +2,7 @@ require 'tempfile'
|
|
|
2
2
|
require 'fileutils'
|
|
3
3
|
require 'pathname'
|
|
4
4
|
|
|
5
|
-
class PicolenaGenerator < RubiGen::Base
|
|
5
|
+
class PicolenaGenerator < RubiGen::Base #:nodoc:
|
|
6
6
|
|
|
7
7
|
DEFAULT_SHEBANG = File.join(Config::CONFIG['bindir'],
|
|
8
8
|
Config::CONFIG['ruby_install_name'])
|
|
@@ -58,12 +58,15 @@ class PicolenaGenerator < RubiGen::Base
|
|
|
58
58
|
end
|
|
59
59
|
|
|
60
60
|
# Picolena configuration files
|
|
61
|
-
m.file '../config/white_list_ip.yml', 'config/white_list_ip.yml'
|
|
62
|
-
m.
|
|
61
|
+
m.file '../config/white_list_ip.yml', 'config/custom/white_list_ip.yml'
|
|
62
|
+
m.file '../config/basic.rb', 'config/custom/picolena.rb'
|
|
63
|
+
m.template '../config/indexed_directories.yml', 'config/custom/indexed_directories.yml', :assigns => {:directories_to_index => @directories_to_index}
|
|
64
|
+
m.template '../config/title_and_names_and_links.yml', 'config/custom/title_and_names_and_links.yml', :assigns => {:version => Picolena::VERSION::STRING}
|
|
63
65
|
|
|
64
66
|
# README, License & Rakefile
|
|
65
67
|
m.file 'MIT-LICENSE', 'LICENSE'
|
|
66
68
|
m.file '../../../README.txt', 'README'
|
|
69
|
+
m.file '../../../README.txt', 'doc/README_FOR_APP'
|
|
67
70
|
m.file 'Rakefile', 'Rakefile'
|
|
68
71
|
|
|
69
72
|
unless options[:no_index]
|
|
@@ -128,6 +131,8 @@ EOS
|
|
|
128
131
|
config
|
|
129
132
|
config/environments
|
|
130
133
|
config/initializers
|
|
134
|
+
config/custom
|
|
135
|
+
doc
|
|
131
136
|
lang/ui
|
|
132
137
|
lib
|
|
133
138
|
lib/filters
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
+
# ApplicationController just checks every incoming request according to the remote IP address.
|
|
2
|
+
#
|
|
3
|
+
# The request is sent to DocumentsController only if the IP is included in the white list.
|
|
4
|
+
# Otherwise, it returns "Access denied" 403.
|
|
5
|
+
|
|
1
6
|
class ApplicationController < ActionController::Base
|
|
2
7
|
session :disabled => true
|
|
3
8
|
before_filter :should_only_be_available_for_white_list_IPs, :except=> :access_denied
|
|
4
9
|
|
|
5
|
-
#
|
|
10
|
+
# Returns 403 status in case of an unknown remote IP address
|
|
6
11
|
def access_denied
|
|
7
|
-
render :text=>
|
|
12
|
+
render :text=>request.inspect, :status => 403
|
|
8
13
|
end
|
|
9
14
|
|
|
10
|
-
#
|
|
15
|
+
# Redirects to documents_url in case route hasn't been recognised
|
|
11
16
|
def unknown_request
|
|
12
17
|
flash[:warning]="Unknown URL"
|
|
13
18
|
redirect_to documents_url
|
|
@@ -15,6 +20,8 @@ class ApplicationController < ActionController::Base
|
|
|
15
20
|
|
|
16
21
|
private
|
|
17
22
|
|
|
23
|
+
# Tries to match remote IP address with the white list defined in config/custom/white_list_ip.yml
|
|
24
|
+
# Redirects to :access_denied if the remote IP is not white listed.
|
|
18
25
|
def should_only_be_available_for_white_list_IPs
|
|
19
26
|
unless request.remote_ip =~ WhiteListIPs
|
|
20
27
|
redirect_to :controller => 'application', :action=>'access_denied'
|
|
@@ -1,5 +1,13 @@
|
|
|
1
|
+
# Core controller of Picolena search-engine.
|
|
2
|
+
# DocumentsController
|
|
3
|
+
# - treats queries
|
|
4
|
+
# - launches searches
|
|
5
|
+
# - returns matching documents
|
|
6
|
+
# - displays document content
|
|
7
|
+
# - displays cached content.
|
|
8
|
+
|
|
1
9
|
class DocumentsController < ApplicationController
|
|
2
|
-
before_filter :check_if_valid_link, :only=> [:download, :
|
|
10
|
+
before_filter :check_if_valid_link, :only=> [:download, :content, :cached]
|
|
3
11
|
|
|
4
12
|
# Actually doesn't check anything at all. Just a redirect to show_document(query)
|
|
5
13
|
#
|
|
@@ -16,7 +24,7 @@ class DocumentsController < ApplicationController
|
|
|
16
24
|
page=params[:page]||1
|
|
17
25
|
finder=Finder.new(@query,page)
|
|
18
26
|
finder.execute!
|
|
19
|
-
pager=::Paginator.new(finder.total_hits, ResultsPerPage) do
|
|
27
|
+
pager=::Paginator.new(finder.total_hits, ResultsPerPage) do
|
|
20
28
|
finder.matching_documents
|
|
21
29
|
end
|
|
22
30
|
@matching_documents=pager.page(page)
|
|
@@ -25,22 +33,33 @@ class DocumentsController < ApplicationController
|
|
|
25
33
|
end
|
|
26
34
|
|
|
27
35
|
|
|
28
|
-
# Download the file whose
|
|
36
|
+
# Download the file whose probably_unique_id is given.
|
|
29
37
|
# If the checksum is incorrect, redirect to documents_url via no_valid_link
|
|
30
38
|
def download
|
|
31
|
-
send_file @document.complete_path
|
|
39
|
+
send_file @document.complete_path
|
|
32
40
|
end
|
|
33
41
|
|
|
34
|
-
|
|
42
|
+
# Returns the content of the document identified by probably_unique_id, as it is *now*.
|
|
43
|
+
def content
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Returns the content of the document identified by probably_unique_id, as it was at the time it was indexed.
|
|
47
|
+
# similar to Google cache.
|
|
48
|
+
def cached
|
|
35
49
|
end
|
|
36
50
|
|
|
37
51
|
private
|
|
38
52
|
|
|
53
|
+
# Returns corresponding document for any given "probably unique id"
|
|
54
|
+
# Redirects to no_valid_link if:
|
|
55
|
+
# there are more than one matching document (hash collision)
|
|
56
|
+
# there is no matching document (wrong hash)
|
|
39
57
|
def check_if_valid_link
|
|
40
|
-
@
|
|
41
|
-
@document=
|
|
58
|
+
@probably_unique_id=params[:id]
|
|
59
|
+
@document=Document.find_by_unique_id(@probably_unique_id) rescue no_valid_link
|
|
42
60
|
end
|
|
43
61
|
|
|
62
|
+
# Flashes a warning and redirects to documents_url.
|
|
44
63
|
def no_valid_link
|
|
45
64
|
flash[:warning]="no valid link"
|
|
46
65
|
redirect_to documents_url
|
|
@@ -1,10 +1,19 @@
|
|
|
1
1
|
module DocumentsHelper
|
|
2
|
+
# Returns true if no document as been found for a given query.
|
|
3
|
+
def nothing_found?
|
|
4
|
+
@matching_documents.nil? or @matching_documents.entries.empty?
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
# Very basic pagination.
|
|
8
|
+
# Provides liks to Next, Prev and FirstPage when needed.
|
|
2
9
|
def should_paginate(page,query)
|
|
3
10
|
[(link_to("←←", :action => :show, :id => query, :page => 1) if page.number>2),
|
|
4
11
|
(link_to("←", :action => :show, :id => query, :page => page.prev.number) if page.prev?),
|
|
5
12
|
(link_to("→", :action => :show, :id => query, :page => page.next.number) if page.next?)].compact.join(" | ")
|
|
6
13
|
end
|
|
7
14
|
|
|
15
|
+
# Returns a localized sentence like "Results 1-10 of 12 for Zimbabwe (0.472s)" or
|
|
16
|
+
# "Résultats 1-2 parmi 2 pour whatever (0.012s)"
|
|
8
17
|
def describe_results(page, total_hits, dt, query)
|
|
9
18
|
[:results.l,
|
|
10
19
|
content_tag(:strong,"#{page.first_item_number}-#{page.last_item_number}"),
|
|
@@ -16,20 +25,25 @@ module DocumentsHelper
|
|
|
16
25
|
].join(' ')
|
|
17
26
|
end
|
|
18
27
|
|
|
28
|
+
# Returns the time needed to treat the query and launch the search, with a ms precision : (0.472s)
|
|
19
29
|
def show_time_needed(dt)
|
|
20
30
|
content_tag(:small,'('<<number_with_precision(dt,3)<<'s)')
|
|
21
31
|
end
|
|
22
|
-
|
|
32
|
+
|
|
33
|
+
# When possible, highlights content of the document that match the query.
|
|
23
34
|
def highlight_matching_content(document)
|
|
24
35
|
content_tag(:ul,document.matching_content.collect{|sentence|
|
|
25
36
|
content_tag(:li,h(sentence).gsub(/<<(.*?)>>/,'<strong>\1</strong>').gsub(/\v|\f/,''))
|
|
26
37
|
}) if document.matching_content
|
|
27
38
|
end
|
|
28
39
|
|
|
29
|
-
|
|
30
|
-
|
|
40
|
+
# Returns icon and filename for any given document.
|
|
41
|
+
def icon_and_filename_for(document)
|
|
42
|
+
[icon_for(document.extname),document.filename].join(" ")
|
|
31
43
|
end
|
|
32
44
|
|
|
45
|
+
# Returns the location (if avaible) of the filetype icon.
|
|
46
|
+
# TODO: Move this hash to a .yml config file.
|
|
33
47
|
def icon_for(filetype)
|
|
34
48
|
pic_for_exts={
|
|
35
49
|
:xls=>%w{xls xlsx ods},
|
|
@@ -53,18 +67,26 @@ module DocumentsHelper
|
|
|
53
67
|
image_tag("icons/#{pic.first}.png") if pic
|
|
54
68
|
end
|
|
55
69
|
|
|
56
|
-
|
|
57
|
-
|
|
70
|
+
# Returns a link to a backup search engine that could maybe find more results for the same query.
|
|
71
|
+
def link_to_backup_search_engine(query)
|
|
72
|
+
link_to :backup_search_engine_name.l, :backup_search_engine_url.l<<query
|
|
58
73
|
end
|
|
59
74
|
|
|
75
|
+
# For any indexed document, returns a link to its containing directory.
|
|
60
76
|
def link_to_containing_directory(document)
|
|
61
77
|
link_name=image_tag('icons/remote_folder.png')<<' '<<content_tag(:small,document.alias_path)
|
|
62
78
|
link_to link_name, document.alias_path, :target=>'_blank'
|
|
63
79
|
end
|
|
64
80
|
|
|
81
|
+
# For any indexed document, returns a link to show its content.
|
|
65
82
|
def link_to_plain_text_content(document)
|
|
66
|
-
return unless document.supported?
|
|
67
83
|
link_name=image_tag('icons/plain_text_small.png')<<' '<<content_tag(:small,:text_content.l)
|
|
68
|
-
link_to link_name,
|
|
84
|
+
link_to link_name, content_document_path(document.probably_unique_id)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# For any indexed document, returns a link to show its cached content.
|
|
88
|
+
def link_to_cached_content(document)
|
|
89
|
+
link_name="("<<content_tag(:small,:cached.l)<<")"
|
|
90
|
+
link_to link_name, cached_document_path(document.probably_unique_id)
|
|
69
91
|
end
|
|
70
92
|
end
|
|
@@ -1,64 +1,98 @@
|
|
|
1
|
+
# Document class retrieves information from filesystem and the index for any given document.
|
|
1
2
|
class Document
|
|
2
3
|
attr_reader :complete_path
|
|
3
|
-
attr_accessor :user, :score, :matching_content
|
|
4
|
+
attr_accessor :user, :score, :matching_content, :index_id
|
|
4
5
|
|
|
5
|
-
def initialize(
|
|
6
|
-
@complete_path
|
|
6
|
+
def initialize(path)
|
|
7
|
+
#To ensure @complete_path is an absolute direction.
|
|
8
|
+
@complete_path=File.expand_path(path)
|
|
7
9
|
validate_existence_of_file
|
|
8
10
|
validate_in_indexed_directory
|
|
9
11
|
end
|
|
10
12
|
|
|
11
|
-
|
|
12
|
-
id
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
def to_s
|
|
16
|
-
filename
|
|
17
|
-
end
|
|
13
|
+
alias_method :to_param, :id
|
|
18
14
|
|
|
19
15
|
#Delegating properties to File::method_name(complete_path)
|
|
20
16
|
[:dirname, :basename, :extname, :size?, :file?, :read, :ext_as_sym].each{|method_name|
|
|
21
17
|
define_method(method_name){File.send(method_name,complete_path)}
|
|
22
18
|
}
|
|
23
19
|
alias_method :size, :size?
|
|
24
|
-
alias_method :content, :read
|
|
25
20
|
alias_method :filename, :basename
|
|
21
|
+
alias_method :to_s, :basename
|
|
26
22
|
|
|
23
|
+
# Returns filename without extension
|
|
24
|
+
# "buildings.odt" => "buildings"
|
|
27
25
|
def basename
|
|
28
26
|
filename.chomp(extname)
|
|
29
27
|
end
|
|
30
|
-
|
|
31
|
-
def absolute_dirname
|
|
32
|
-
Pathname.new(dirname).realpath.to_s
|
|
33
|
-
end
|
|
34
28
|
|
|
29
|
+
# End users should not always know where documents are stored internally.
|
|
30
|
+
# An alias path can be specified in config/indexed_directories.yml
|
|
31
|
+
#
|
|
32
|
+
# For example, with:
|
|
33
|
+
# "/media/wiki_dump/" : "http://www.mycompany.com/wiki/"
|
|
34
|
+
#
|
|
35
|
+
# The documents
|
|
36
|
+
# "/media/wiki_dump/organigram.odp"
|
|
37
|
+
# will be displayed as being:
|
|
38
|
+
# "http://www.mycompany.com/wiki/organigram.odp"
|
|
35
39
|
def alias_path
|
|
36
40
|
original_dir=indexed_directory
|
|
37
41
|
alias_dir=IndexedDirectories[original_dir]
|
|
38
|
-
|
|
42
|
+
dirname.sub(original_dir,alias_dir)
|
|
39
43
|
end
|
|
40
44
|
|
|
41
|
-
|
|
42
|
-
|
|
45
|
+
# Returns an id for this document.
|
|
46
|
+
# This id will be used in Controllers in order to get tiny urls.
|
|
47
|
+
# Since it's a base26 hash of the absolute filename, it can only be "probably unique".
|
|
48
|
+
# For huge amount of indexed documents, it would be wise to increase HashLength in config/custom/picolena.rb
|
|
49
|
+
def probably_unique_id
|
|
50
|
+
@probably_unique_id||=complete_path.base26_hash
|
|
43
51
|
end
|
|
44
52
|
|
|
53
|
+
# Returns true iff some Filter has been defined to convert it to plain text.
|
|
54
|
+
# Document.new("presentation.pdf").supported? => true
|
|
55
|
+
# Document.new("presentation.some_weird_extension").supported? => false
|
|
45
56
|
def supported?
|
|
46
57
|
PlainText.supported_extensions.include?(self.ext_as_sym)
|
|
47
58
|
end
|
|
48
59
|
|
|
60
|
+
# Retrieves content as it is *now*.
|
|
49
61
|
def content
|
|
50
62
|
PlainText.extract_content_from(complete_path)
|
|
51
63
|
end
|
|
52
64
|
|
|
65
|
+
# Cache à la Google.
|
|
66
|
+
# Returns content as it was at the time it was indexed.
|
|
67
|
+
def cached
|
|
68
|
+
get_index_id! unless index_id
|
|
69
|
+
Finder.index[index_id][:content]
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Returns the last modification date before the document got indexed.
|
|
73
|
+
# Useful to know how old a document is, and to which version the cache corresponds.
|
|
74
|
+
def date
|
|
75
|
+
get_index_id! unless index_id
|
|
76
|
+
Finder.index[index_id][:date].sub(/(\d{4})(\d{2})(\d{2})/,'\1-\2-\3')
|
|
77
|
+
end
|
|
78
|
+
|
|
53
79
|
private
|
|
54
80
|
|
|
81
|
+
def get_index_id!
|
|
82
|
+
@index_id = Document.find_by_unique_id(probably_unique_id).index_id
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def self.find_by_unique_id(some_id)
|
|
86
|
+
Finder.new("probably_unique_id:"<<some_id).matching_document
|
|
87
|
+
end
|
|
88
|
+
|
|
55
89
|
def in_indexed_directory?
|
|
56
90
|
!indexed_directory.nil?
|
|
57
91
|
end
|
|
58
92
|
|
|
59
93
|
def indexed_directory
|
|
60
94
|
IndexedDirectories.keys.find{|indexed_dir|
|
|
61
|
-
|
|
95
|
+
dirname.starts_with?(indexed_dir)
|
|
62
96
|
}
|
|
63
97
|
end
|
|
64
98
|
|
|
@@ -1,14 +1,21 @@
|
|
|
1
1
|
require 'ff'
|
|
2
2
|
|
|
3
3
|
class Finder
|
|
4
|
-
|
|
4
|
+
#FIXME: Should not use all those class methods to access index.
|
|
5
|
+
|
|
6
|
+
attr_reader :query
|
|
7
|
+
|
|
8
|
+
def self.index
|
|
9
|
+
# caching index @@index ||=
|
|
10
|
+
# causes ferret-0.11.6/lib/ferret/index.rb:768: [BUG] Segmentation fault
|
|
11
|
+
Ferret::Index::Index.new(:path => IndexSavePath, :analyzer=>Analyzer)
|
|
12
|
+
end
|
|
5
13
|
|
|
6
14
|
def initialize(raw_query,page=1,results_per_page=ResultsPerPage)
|
|
7
15
|
query_parser = Ferret::QueryParser.new(:fields => [:content, :file, :basename, :filetype, :date], :or_default => false, :analyzer=>Analyzer)
|
|
8
16
|
@query = query_parser.parse(convert_to_english(raw_query))
|
|
9
17
|
@raw_query= raw_query
|
|
10
18
|
Finder.ensure_that_index_exists_on_disk
|
|
11
|
-
@index = Ferret::Index::Index.new(:path => IndexSavePath, :analyzer=>Analyzer)
|
|
12
19
|
@per_page=results_per_page
|
|
13
20
|
@offset=(page.to_i-1)*results_per_page
|
|
14
21
|
validate_that_index_has_documents
|
|
@@ -18,16 +25,17 @@ class Finder
|
|
|
18
25
|
@matching_documents=[]
|
|
19
26
|
start=Time.now
|
|
20
27
|
begin
|
|
21
|
-
top_docs=index.search(query, :limit => @per_page, :offset=>@offset)
|
|
28
|
+
top_docs=Finder.index.search(query, :limit => @per_page, :offset=>@offset)
|
|
22
29
|
top_docs.hits.each{|hit|
|
|
23
|
-
|
|
30
|
+
index_id,score=hit.doc,hit.score
|
|
24
31
|
begin
|
|
25
|
-
found_doc=Document.new(index[
|
|
26
|
-
found_doc.matching_content=index.highlight(query,
|
|
32
|
+
found_doc=Document.new(Finder.index[index_id][:complete_path])
|
|
33
|
+
found_doc.matching_content=Finder.index.highlight(query, index_id,
|
|
27
34
|
:field => :content, :excerpt_length => 80,
|
|
28
35
|
:pre_tag => "<<", :post_tag => ">>"
|
|
29
36
|
) unless @raw_query=~/^\*+\.\w*$/
|
|
30
37
|
found_doc.score=score
|
|
38
|
+
found_doc.index_id=index_id
|
|
31
39
|
@matching_documents<<found_doc
|
|
32
40
|
rescue Errno::ENOENT
|
|
33
41
|
#"File has been moved/deleted!"
|
|
@@ -37,14 +45,20 @@ class Finder
|
|
|
37
45
|
@time_needed=Time.now-start
|
|
38
46
|
@total_hits=top_docs.total_hits
|
|
39
47
|
ensure
|
|
40
|
-
index.close
|
|
48
|
+
#index.close
|
|
41
49
|
end
|
|
42
50
|
end
|
|
43
51
|
|
|
52
|
+
# Returns true if it has been executed.
|
|
44
53
|
def executed?
|
|
45
54
|
@executed
|
|
46
55
|
end
|
|
47
56
|
|
|
57
|
+
# To ensure that
|
|
58
|
+
# matching_documents
|
|
59
|
+
# total_hits
|
|
60
|
+
# time_needed
|
|
61
|
+
# methods are called only after the index has been searched.
|
|
48
62
|
[:matching_documents, :total_hits, :time_needed].each{|attribute_name|
|
|
49
63
|
define_method(attribute_name){
|
|
50
64
|
execute! unless executed?
|
|
@@ -52,18 +66,19 @@ class Finder
|
|
|
52
66
|
}
|
|
53
67
|
}
|
|
54
68
|
|
|
69
|
+
# Returns true if index is existing.
|
|
55
70
|
def self.has_index?
|
|
56
71
|
index_filename and File.exists?(index_filename)
|
|
57
72
|
end
|
|
58
73
|
|
|
74
|
+
# Returns true if there's at least one document indexed.
|
|
59
75
|
def has_documents?
|
|
60
|
-
index.size>0
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
def self.up_to_date?
|
|
64
|
-
IndexedDirectories.keys.all?{|dir| File.mtime(index_filename) > File.mtime(dir)}
|
|
76
|
+
Finder.index.size>0
|
|
65
77
|
end
|
|
66
78
|
|
|
79
|
+
# Returns matching document for any given query, if only
|
|
80
|
+
# exactly one document is found.
|
|
81
|
+
# Raises otherwise.
|
|
67
82
|
def matching_document
|
|
68
83
|
case matching_documents.size
|
|
69
84
|
when 0
|
|
@@ -77,7 +92,8 @@ class Finder
|
|
|
77
92
|
|
|
78
93
|
private
|
|
79
94
|
|
|
80
|
-
|
|
95
|
+
# Convert query keywords to english so they can be parsed by Ferret.
|
|
96
|
+
def convert_to_english(query)
|
|
81
97
|
to_en={
|
|
82
98
|
/\b#{:AND.l}\b/=>'AND',
|
|
83
99
|
/\b#{:OR.l}\b/=>'OR',
|
|
@@ -87,7 +103,7 @@ class Finder
|
|
|
87
103
|
/#{:date.l}:/ => 'date:',
|
|
88
104
|
/\b#{:LIKE.l}\s+(\S+)/=>'\1~'
|
|
89
105
|
}
|
|
90
|
-
to_en.inject(
|
|
106
|
+
to_en.inject(query){|mem,non_english_to_english_keyword|
|
|
91
107
|
mem.gsub(*non_english_to_english_keyword)
|
|
92
108
|
}
|
|
93
109
|
end
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
%h2
|
|
2
|
-
=link_to icon_and_filename_for(document), download_document_path(document.
|
|
2
|
+
=link_to icon_and_filename_for(document), download_document_path(document.probably_unique_id)
|
|
3
3
|
%small=number_to_percentage(document.score*100, :precision=>1)
|
|
4
4
|
=highlight_matching_content(document)
|
|
5
5
|
%p=link_to_containing_directory(document)
|
|
6
|
-
|
|
6
|
+
- if document.supported?
|
|
7
|
+
%p
|
|
8
|
+
=link_to_plain_text_content(document)
|
|
9
|
+
=link_to_cached_content(document)
|
|
7
10
|
%hr/
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
RAILS_ROOT = "#{File.dirname(__FILE__)}/.." unless defined?(RAILS_ROOT)
|
|
5
5
|
|
|
6
|
-
module Rails
|
|
6
|
+
module Rails #:nodoc:
|
|
7
7
|
class << self
|
|
8
8
|
def boot!
|
|
9
9
|
unless booted?
|
|
@@ -34,20 +34,20 @@ module Rails
|
|
|
34
34
|
end
|
|
35
35
|
end
|
|
36
36
|
|
|
37
|
-
class Boot
|
|
37
|
+
class Boot #:nodoc:
|
|
38
38
|
def run
|
|
39
39
|
load_initializer
|
|
40
40
|
Rails::Initializer.run(:set_load_path)
|
|
41
41
|
end
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
-
class VendorBoot < Boot
|
|
44
|
+
class VendorBoot < Boot #:nodoc:
|
|
45
45
|
def load_initializer
|
|
46
46
|
require "#{RAILS_ROOT}/vendor/rails/railties/lib/initializer"
|
|
47
47
|
end
|
|
48
48
|
end
|
|
49
49
|
|
|
50
|
-
class GemBoot < Boot
|
|
50
|
+
class GemBoot < Boot #:nodoc:
|
|
51
51
|
def load_initializer
|
|
52
52
|
self.class.load_rubygems
|
|
53
53
|
load_rails_gem
|
data/lib/picolena/templates/config/initializers/{002_indexed_dirs.rb → 002_load_indexed_dirs.rb}
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#Loading directories to be indexed
|
|
2
|
-
indexed_dir_config_file=
|
|
2
|
+
indexed_dir_config_file='config/custom/indexed_directories.yml'
|
|
3
3
|
IndexedDirectories={}
|
|
4
4
|
YAML.load_file(indexed_dir_config_file)[RAILS_ENV].each_pair{|abs_or_rel_path, alias_path|
|
|
5
5
|
IndexedDirectories[Pathname(abs_or_rel_path).realpath.to_s]=alias_path
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
#Deny all, Allow only IPs described in config/custom/white_list_ip.yml
|
|
2
|
+
white_list_ip_config_file='config/custom/white_list_ip.yml'
|
|
3
|
+
WhiteListIPs=Regexp.new(
|
|
4
|
+
"^("<<
|
|
5
|
+
YAML.load_file(white_list_ip_config_file)["Allow"].collect{|ip|
|
|
6
|
+
ip.downcase.include?("all") ? /.*/ : Regexp.escape(ip)
|
|
7
|
+
}.join("|")<<")"
|
|
8
|
+
) rescue /^(127\.0\.0\.1|0\.0\.0\.0)/
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
ActionController::Routing::Routes.draw do |map|
|
|
2
|
-
map.resources :documents, :collection=>{:check_query=>:post}, :member=>{:download=>:get, :
|
|
2
|
+
map.resources :documents, :collection=>{:check_query=>:post}, :member=>{:download=>:get, :content=>:get, :cached=>:get}
|
|
3
3
|
map.connect 'documents/:id', :controller=>'documents', :action=>'show', :id => /.*/
|
|
4
4
|
map.connect 'access_denied', :controller=> 'application', :action => 'access_denied'
|
|
5
5
|
map.connect "*anything", :controller=>'application', :action => 'unknown_request'
|
|
@@ -9,10 +9,8 @@ results: Ergebnisse
|
|
|
9
9
|
of: von
|
|
10
10
|
for: für
|
|
11
11
|
text_content: Text-Inhalt
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
website_name: Picolena
|
|
15
|
-
website_url: http://picolena.devjavu.com/
|
|
12
|
+
cached: cached
|
|
13
|
+
as_it_was_indexed_on: wie am
|
|
16
14
|
|
|
17
15
|
## Boolean operators
|
|
18
16
|
AND: UND
|
|
@@ -9,10 +9,8 @@ results: Results
|
|
|
9
9
|
of: of
|
|
10
10
|
for: for
|
|
11
11
|
text_content: Text content
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
website_name: Picolena
|
|
15
|
-
website_url: http://picolena.devjavu.com/
|
|
12
|
+
cached: cached
|
|
13
|
+
as_it_was_indexed_on: as it was indexed on
|
|
16
14
|
|
|
17
15
|
## Boolean operators
|
|
18
16
|
AND: AND
|
|
@@ -8,11 +8,9 @@ nothing_found_for: Ningún documento encontrado por
|
|
|
8
8
|
results: Resultados
|
|
9
9
|
of: de
|
|
10
10
|
for: por
|
|
11
|
-
text_content:
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
website_name: Picolena
|
|
15
|
-
website_url: http://picolena.devjavu.com/
|
|
11
|
+
text_content: ver contenido
|
|
12
|
+
cached: caché
|
|
13
|
+
as_it_was_indexed_on: como fue indexado el
|
|
16
14
|
|
|
17
15
|
## Boolean operators
|
|
18
16
|
AND: Y
|
|
@@ -8,11 +8,9 @@ nothing_found_for: Rien trouvé pour
|
|
|
8
8
|
results: Résultats
|
|
9
9
|
of: parmi
|
|
10
10
|
for: pour
|
|
11
|
-
text_content: contenu
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
website_name: Picolena
|
|
15
|
-
website_url: http://picolena.devjavu.com/
|
|
11
|
+
text_content: voir le contenu
|
|
12
|
+
cached: cached
|
|
13
|
+
as_it_was_indexed_on: comme indexé le
|
|
16
14
|
|
|
17
15
|
## Boolean operators
|
|
18
16
|
AND: ET
|
|
@@ -17,7 +17,7 @@ end
|
|
|
17
17
|
|
|
18
18
|
class String
|
|
19
19
|
# Creates a "probably unique" id with the desired length, composed only of lowercase letters.
|
|
20
|
-
def base26_hash(length=
|
|
20
|
+
def base26_hash(length=HashLength)
|
|
21
21
|
Digest::MD5.hexdigest(self).to_i(16).to_s(26).tr('0-9a-p', 'a-z')[-length,length]
|
|
22
22
|
end
|
|
23
23
|
end
|
|
@@ -20,11 +20,11 @@ def index_file(index, filename, mime_type=nil)
|
|
|
20
20
|
complete_path=File.expand_path(filename)
|
|
21
21
|
fields = {
|
|
22
22
|
:complete_path=> complete_path,
|
|
23
|
-
:
|
|
23
|
+
:probably_unique_id => complete_path.base26_hash,
|
|
24
24
|
:file => File.basename(filename),
|
|
25
25
|
:basename => File.basename(filename, File.extname(filename)).gsub(/_/,' '),
|
|
26
26
|
:filetype => File.extname(filename),
|
|
27
|
-
:date => File.mtime(filename).strftime("%Y%m%d")
|
|
27
|
+
:date => File.mtime(filename).strftime("%Y%m%d%H%M")
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
if mime_type then
|
|
@@ -112,6 +112,6 @@ def add_fields(index)
|
|
|
112
112
|
index.field_infos.add_field(:basename, :store => :no, :index => :yes, :boost => 1.5)
|
|
113
113
|
index.field_infos.add_field(:file, :store => :no, :index => :yes, :boost => 1.5)
|
|
114
114
|
index.field_infos.add_field(:filetype, :store => :no, :index => :yes, :boost => 1.5)
|
|
115
|
-
index.field_infos.add_field(:date, :store=>:
|
|
116
|
-
index.field_infos.add_field(:
|
|
115
|
+
index.field_infos.add_field(:date, :store=>:yes, :index=>:yes)
|
|
116
|
+
index.field_infos.add_field(:probably_unique_id, :store=>:no, :index=>:yes)
|
|
117
117
|
end
|
|
@@ -8,17 +8,18 @@ describe "DocumentsController called from unknown IP" do
|
|
|
8
8
|
end
|
|
9
9
|
|
|
10
10
|
it "should deny access" do
|
|
11
|
-
|
|
11
|
+
# Displays a warning otherwise
|
|
12
|
+
Object.send(:remove_const, :WhiteListIPs) && WhiteListIPs=/Something that won't match/
|
|
12
13
|
get 'index'
|
|
13
14
|
response.should be_redirect
|
|
14
15
|
response.should redirect_to(:controller=>'application', :action=>'access_denied')
|
|
15
|
-
WhiteListIPs=/^0\.0\.0\.0/
|
|
16
|
+
Object.send(:remove_const, :WhiteListIPs) && WhiteListIPs=/^0\.0\.0\.0/
|
|
16
17
|
get 'index'
|
|
17
18
|
response.should be_success
|
|
18
19
|
end
|
|
19
20
|
|
|
20
21
|
after(:all) do
|
|
21
|
-
WhiteListIPs=@backup
|
|
22
|
+
Object.send(:remove_const, :WhiteListIPs) && WhiteListIPs=@backup
|
|
22
23
|
end
|
|
23
24
|
end
|
|
24
25
|
|
|
@@ -96,18 +97,18 @@ describe DocumentsController do
|
|
|
96
97
|
response.should be_success
|
|
97
98
|
assigns[:matching_documents].entries.should_not be_empty
|
|
98
99
|
d=assigns[:matching_documents].entries.first
|
|
99
|
-
get 'download', :id=>d.
|
|
100
|
+
get 'download', :id=>d.probably_unique_id
|
|
100
101
|
assigns[:document].complete_path == d.complete_path
|
|
101
102
|
response.should be_success
|
|
102
103
|
end
|
|
103
104
|
|
|
104
105
|
it "GET 'download' should redirect if wrong id" do
|
|
105
|
-
|
|
106
|
-
get 'download', :id=>
|
|
106
|
+
probably_unique_id="Not a document".base26_hash
|
|
107
|
+
get 'download', :id=>probably_unique_id
|
|
107
108
|
response.should be_redirect
|
|
108
109
|
response.should redirect_to(documents_url)
|
|
109
|
-
|
|
110
|
-
get 'download', :id=>
|
|
110
|
+
probably_unique_id='Whatever'
|
|
111
|
+
get 'download', :id=>probably_unique_id
|
|
111
112
|
response.should be_redirect
|
|
112
113
|
response.should redirect_to(documents_url)
|
|
113
114
|
end
|
|
@@ -17,7 +17,7 @@ describe "Finder without index on disk" do
|
|
|
17
17
|
IndexedDirectories.replace({'spec/test_dirs/indexed/just_one_doc'=>'//justonedoc/'})
|
|
18
18
|
lambda {@finder_with_new_index=Finder.new("test moi")}.should change(Finder, :has_index?).from(false).to(true)
|
|
19
19
|
File.exists?(File.join(@new_index_path,'_0.cfs')).should be_true
|
|
20
|
-
|
|
20
|
+
Finder.index.size.should >0
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
it "should raise if index is still empty after trying to create it" do
|
|
@@ -75,21 +75,14 @@ describe "Basic Finder" do
|
|
|
75
75
|
@finder=Finder.new("some query")
|
|
76
76
|
lambda {@finder.execute!}.should change(@finder, :executed?).from(false).to(true)
|
|
77
77
|
end
|
|
78
|
-
|
|
79
|
-
it "should know if index is up to date" do
|
|
80
|
-
Finder.has_index?.should be_true
|
|
81
|
-
Finder.should respond_to(:up_to_date?)
|
|
82
|
-
Finder.should be_up_to_date
|
|
83
|
-
end
|
|
84
|
-
|
|
78
|
+
|
|
85
79
|
it "should not warn anything if index is up to date"
|
|
86
80
|
|
|
87
81
|
it "should warn if index is not up to date"
|
|
88
82
|
|
|
89
83
|
fields.each_pair do |description,field_name|
|
|
90
84
|
it "should index #{description} as :#{field_name}" do
|
|
91
|
-
|
|
92
|
-
@finder.index.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
|
|
85
|
+
Finder.index.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
|
|
93
86
|
end
|
|
94
87
|
end
|
|
95
88
|
|
|
@@ -2,9 +2,9 @@ require File.dirname(__FILE__) + '/../spec_helper'
|
|
|
2
2
|
|
|
3
3
|
basic_pdf_attribute={
|
|
4
4
|
:size=>9380,
|
|
5
|
-
:dirname=>'spec/test_dirs/indexed/basic',
|
|
5
|
+
:dirname=>File.join(RAILS_ROOT, 'spec/test_dirs/indexed/basic'),
|
|
6
6
|
:basename=>'basic',
|
|
7
|
-
:complete_path=>'spec/test_dirs/indexed/basic/basic.pdf',
|
|
7
|
+
:complete_path=>File.join(RAILS_ROOT, '/spec/test_dirs/indexed/basic/basic.pdf'),
|
|
8
8
|
:extname=>'.pdf',
|
|
9
9
|
:filename=>'basic.pdf'
|
|
10
10
|
}
|
|
@@ -1,13 +1,19 @@
|
|
|
1
1
|
require File.dirname(__FILE__) + '/../spec_helper'
|
|
2
2
|
|
|
3
|
+
def revert_changes!(file,content)
|
|
4
|
+
File.open(file,'w'){|might_have_been_modified|
|
|
5
|
+
might_have_been_modified.write content
|
|
6
|
+
}
|
|
7
|
+
end
|
|
8
|
+
|
|
3
9
|
describe Finder do
|
|
4
10
|
before(:all) do
|
|
5
11
|
# SVN doesn't like non-ascii filenames.
|
|
6
|
-
|
|
7
|
-
weird_file.write "just to know if files are indexed with utf8 filenames"
|
|
8
|
-
}
|
|
9
|
-
|
|
12
|
+
revert_changes!('spec/test_dirs/indexed/others/bäñüßé.txt',"just to know if files are indexed with utf8 filenames")
|
|
10
13
|
|
|
14
|
+
# To be sure this file has the right content
|
|
15
|
+
revert_changes!("spec/test_dirs/indexed/others/placeholder.txt","Absorption and Adsorption cooling machines!!!")
|
|
16
|
+
|
|
11
17
|
once_upon_a_time=Time.local(1982,2,16,20,42)
|
|
12
18
|
a_bit_later=Time.local(1983,12,9,9)
|
|
13
19
|
nineties=Time.local(1990)
|
|
@@ -41,10 +47,9 @@ describe Finder do
|
|
|
41
47
|
end
|
|
42
48
|
|
|
43
49
|
it "should give a boost to basename, filename and filetype in index" do
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
@finder.index.field_infos[:filetype].boost.should > 1.0
|
|
50
|
+
Finder.index.field_infos[:basename].boost.should > 1.0
|
|
51
|
+
Finder.index.field_infos[:file].boost.should > 1.0
|
|
52
|
+
Finder.index.field_infos[:filetype].boost.should > 1.0
|
|
48
53
|
end
|
|
49
54
|
|
|
50
55
|
it "should also index unreadable files with known mimetypes" do
|
|
@@ -85,7 +90,7 @@ describe Finder do
|
|
|
85
90
|
|
|
86
91
|
it "should find documents according to their modification date" do
|
|
87
92
|
Finder.new("date:<1982").matching_documents.should be_empty
|
|
88
|
-
Finder.new("19831209").matching_document.basename.should == "office2003-word-template"
|
|
93
|
+
Finder.new("19831209*").matching_document.basename.should == "office2003-word-template"
|
|
89
94
|
Finder.new("date:<1983").matching_document.filename.should == "basic.pdf"
|
|
90
95
|
Finder.new("date:>=1989 AND date:<=1992").matching_document.filename.should == "placeholder.txt"
|
|
91
96
|
end
|
|
@@ -195,6 +200,22 @@ describe Finder do
|
|
|
195
200
|
Finder.new("Thumbs.db").matching_documents.should be_empty
|
|
196
201
|
Finder.new("filetype:db").matching_documents.should_not be_empty
|
|
197
202
|
end
|
|
203
|
+
|
|
204
|
+
it "should keep content cached" do
|
|
205
|
+
filename = "spec/test_dirs/indexed/others/placeholder.txt"
|
|
206
|
+
content_before = "Absorption and Adsorption cooling machines!!!"
|
|
207
|
+
some_doc=Document.new(filename)
|
|
208
|
+
some_doc.content.should == content_before
|
|
209
|
+
File.open(filename,'a'){|doc|
|
|
210
|
+
doc.write("This line should not be indexed. It shouldn't be found in cache")
|
|
211
|
+
}
|
|
212
|
+
some_doc.content.should_not == content_before
|
|
213
|
+
some_doc.cached.should == content_before
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
after(:all) do
|
|
217
|
+
revert_changes!("spec/test_dirs/indexed/others/placeholder.txt","Absorption and Adsorption cooling machines!!!")
|
|
218
|
+
end
|
|
198
219
|
|
|
199
220
|
# Not sure about this spec!
|
|
200
221
|
# English, or German?
|
|
@@ -7,17 +7,23 @@ describe "Host indexing system" do
|
|
|
7
7
|
end
|
|
8
8
|
end
|
|
9
9
|
|
|
10
|
-
it "should know which IP addresses are allowed (config/white_list_ip.yml)" do
|
|
11
|
-
File.should be_readable('config/white_list_ip.yml')
|
|
10
|
+
it "should know which IP addresses are allowed (config/custom/white_list_ip.yml)" do
|
|
11
|
+
File.should be_readable('config/custom/white_list_ip.yml')
|
|
12
12
|
end
|
|
13
13
|
|
|
14
|
-
it "should know which directories are to be indexed (config/indexed_directories.yml)" do
|
|
15
|
-
File.should be_readable('config/indexed_directories.yml')
|
|
14
|
+
it "should know which directories are to be indexed (config/custom/indexed_directories.yml)" do
|
|
15
|
+
File.should be_readable('config/custom/indexed_directories.yml')
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
# it would probably take ages to find a string whose hash == "picolena" :(
|
|
19
18
|
it "should be able to calculate base26 hash from strings" do
|
|
19
|
+
"test_dirs/indexed/010/decrepito.pdf".base26_hash(5).should == "rails"
|
|
20
|
+
"test_dirs/indexed/migrations/000_restreins.rb".base26_hash(5).should == "ricou"
|
|
21
|
+
# it would probably take ages to find a string whose hash == "picolena" :(
|
|
20
22
|
"test_dirs/indexed/1148/plots.odt".base26_hash(8).should == "picolehn"
|
|
21
|
-
"whatever.pdf".base26_hash.should == "bbuxhynait"
|
|
23
|
+
"whatever.pdf".base26_hash(10).should == "bbuxhynait"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it "should not use too small a hash for Document#probably_unique_id" do
|
|
27
|
+
HashLength.should_not < 10
|
|
22
28
|
end
|
|
23
29
|
end
|
data/lib/picolena/version.rb
CHANGED
data/lib/rubigen_ext.rb
CHANGED
data/tasks/hack.rake
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
desc 'Create development picolena structure inside lib/picolena/templates'
|
|
2
|
+
task :lets_hack do
|
|
3
|
+
picolena_root=File.join(File.dirname(__FILE__),'..')
|
|
4
|
+
Dir.chdir(picolena_root){
|
|
5
|
+
system("ruby bin/picolena lib/picolena/templates/spec/test_dirs --skip --no-index --no-spec --destination=lib/picolena/templates")
|
|
6
|
+
}
|
|
7
|
+
puts <<-EXPLAIN
|
|
8
|
+
|
|
9
|
+
You now have a complete picolena installation in:
|
|
10
|
+
#{File.expand_path(File.join(File.dirname(__FILE__),'../lib/picolena/templates'))}
|
|
11
|
+
|
|
12
|
+
You can now hack and submit patches!
|
|
13
|
+
|
|
14
|
+
Once done, you can remove those files by typing:
|
|
15
|
+
rake clean
|
|
16
|
+
EXPLAIN
|
|
17
|
+
end
|
data/tasks/rspec.rake
CHANGED
|
@@ -1,20 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
end
|
|
7
|
-
begin
|
|
8
|
-
require 'spec/rake/spectask'
|
|
9
|
-
rescue LoadError
|
|
10
|
-
puts <<-EOS
|
|
11
|
-
To use rspec for testing you must install rspec gem:
|
|
12
|
-
gem install rspec
|
|
13
|
-
EOS
|
|
14
|
-
exit(0)
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
desc "Run the specs under spec/models"
|
|
18
|
-
Spec::Rake::SpecTask.new do |t|
|
|
19
|
-
t.spec_files = FileList['spec/**/*_spec.rb']
|
|
20
|
-
end
|
|
1
|
+
desc 'Create a temporary picolena structure and launch specs from it'
|
|
2
|
+
task :spec => :clean do
|
|
3
|
+
require 'pathname'
|
|
4
|
+
picolena_bin=Pathname(File.join(File.dirname(__FILE__),'../bin/picolena')).realpath.to_s
|
|
5
|
+
system("ruby #{picolena_bin} --spec-only")
|
|
6
|
+
end
|
data/tasks/test.rake
ADDED
data/website/index.html
CHANGED
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
<h1>Picolena</h1>
|
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/picolena"; return false'>
|
|
35
35
|
<p>Get Version</p>
|
|
36
|
-
<a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.
|
|
36
|
+
<a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.1</a>
|
|
37
37
|
</div>
|
|
38
38
|
<h1>→ ‘picolena’</h1>
|
|
39
39
|
|
|
@@ -114,7 +114,7 @@ ruby script/server</code></pre>
|
|
|
114
114
|
|
|
115
115
|
<p>Comments are welcome. Send an email to <a href="mailto:eric_duminil@rubyforge.org">Eric Duminil</a> email via the <a href="http://groups.google.com/group/picolena">forum</a></p>
|
|
116
116
|
<p class="coda">
|
|
117
|
-
<a href="eric_duminil@rubyforge.org">Eric DUMINIL</a>,
|
|
117
|
+
<a href="eric_duminil@rubyforge.org">Eric DUMINIL</a>, 12th April 2008<br>
|
|
118
118
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>,
|
|
119
119
|
by Daniel Cadenas via <a href="http://depgraph.rubyforge.org/">DepGraph</a>
|
|
120
120
|
</p>
|
data.tar.gz.sig
CHANGED
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: picolena
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Eric Duminil
|
|
@@ -30,7 +30,7 @@ cert_chain:
|
|
|
30
30
|
qvI9FgPZ1QTG5uZAlBbk6d6JU2XfpA==
|
|
31
31
|
-----END CERTIFICATE-----
|
|
32
32
|
|
|
33
|
-
date: 2008-04-
|
|
33
|
+
date: 2008-04-12 00:00:00 +02:00
|
|
34
34
|
default_executable:
|
|
35
35
|
dependencies:
|
|
36
36
|
- !ruby/object:Gem::Dependency
|
|
@@ -87,6 +87,15 @@ dependencies:
|
|
|
87
87
|
- !ruby/object:Gem::Version
|
|
88
88
|
version: 1.1.0
|
|
89
89
|
version:
|
|
90
|
+
- !ruby/object:Gem::Dependency
|
|
91
|
+
name: rspec
|
|
92
|
+
version_requirement:
|
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
94
|
+
requirements:
|
|
95
|
+
- - ">="
|
|
96
|
+
- !ruby/object:Gem::Version
|
|
97
|
+
version: 1.1.3
|
|
98
|
+
version:
|
|
90
99
|
description: Picolena is a lightweight ferret-powered documents search engine written in Ruby on rails
|
|
91
100
|
email:
|
|
92
101
|
- eric_duminil@rubyforge.org
|
|
@@ -129,10 +138,13 @@ files:
|
|
|
129
138
|
- README.txt
|
|
130
139
|
- Rakefile
|
|
131
140
|
- bin/picolena
|
|
141
|
+
- config/files_to_clean
|
|
132
142
|
- config/hoe.rb
|
|
133
143
|
- config/requirements.rb
|
|
134
144
|
- lib/picolena/USAGE
|
|
145
|
+
- lib/picolena/config/basic.rb
|
|
135
146
|
- lib/picolena/config/indexed_directories.yml
|
|
147
|
+
- lib/picolena/config/title_and_names_and_links.yml
|
|
136
148
|
- lib/picolena/config/white_list_ip.yml
|
|
137
149
|
- lib/picolena/picolena_generator.rb
|
|
138
150
|
- lib/picolena/templates/MIT-LICENSE
|
|
@@ -144,19 +156,21 @@ files:
|
|
|
144
156
|
- lib/picolena/templates/app/models/document.rb
|
|
145
157
|
- lib/picolena/templates/app/models/finder.rb
|
|
146
158
|
- lib/picolena/templates/app/views/documents/_document.html.haml
|
|
159
|
+
- lib/picolena/templates/app/views/documents/cached.html.haml
|
|
160
|
+
- lib/picolena/templates/app/views/documents/content.html.haml
|
|
147
161
|
- lib/picolena/templates/app/views/documents/index.html.haml
|
|
148
162
|
- lib/picolena/templates/app/views/documents/show.html.haml
|
|
149
|
-
- lib/picolena/templates/app/views/documents/show_content.html.haml
|
|
150
163
|
- lib/picolena/templates/app/views/layouts/application.html.haml
|
|
151
164
|
- lib/picolena/templates/config/boot.rb
|
|
152
165
|
- lib/picolena/templates/config/environment.rb
|
|
153
166
|
- lib/picolena/templates/config/environments/development.rb
|
|
154
167
|
- lib/picolena/templates/config/environments/production.rb
|
|
155
168
|
- lib/picolena/templates/config/environments/test.rb
|
|
156
|
-
- lib/picolena/templates/config/initializers/
|
|
157
|
-
- lib/picolena/templates/config/initializers/
|
|
158
|
-
- lib/picolena/templates/config/initializers/
|
|
169
|
+
- lib/picolena/templates/config/initializers/001_load_custom_config.rb
|
|
170
|
+
- lib/picolena/templates/config/initializers/002_load_indexed_dirs.rb
|
|
171
|
+
- lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb
|
|
159
172
|
- lib/picolena/templates/config/initializers/004_load_filters.rb
|
|
173
|
+
- lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb
|
|
160
174
|
- lib/picolena/templates/config/routes.rb
|
|
161
175
|
- lib/picolena/templates/lang/ui/de.yml
|
|
162
176
|
- lib/picolena/templates/lang/ui/en.yml
|
|
@@ -890,13 +904,13 @@ files:
|
|
|
890
904
|
- script/generate
|
|
891
905
|
- script/txt2html
|
|
892
906
|
- setup.rb
|
|
893
|
-
- spec/picolena_spec.rb
|
|
894
|
-
- tasks/default.rake
|
|
895
907
|
- tasks/deployment.rake
|
|
896
908
|
- tasks/environment.rake
|
|
909
|
+
- tasks/hack.rake
|
|
897
910
|
- tasks/rdoc.rake
|
|
898
911
|
- tasks/ri.rake
|
|
899
912
|
- tasks/rspec.rake
|
|
913
|
+
- tasks/test.rake
|
|
900
914
|
- tasks/website.rake
|
|
901
915
|
- website/index.html
|
|
902
916
|
- website/index.txt
|
|
@@ -931,5 +945,5 @@ rubygems_version: 1.1.0
|
|
|
931
945
|
signing_key:
|
|
932
946
|
specification_version: 2
|
|
933
947
|
summary: Picolena is a lightweight ferret-powered documents search engine written in Ruby on rails
|
|
934
|
-
test_files:
|
|
935
|
-
|
|
948
|
+
test_files: []
|
|
949
|
+
|
metadata.gz.sig
CHANGED
|
Binary file
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
#Deny all, Allow only IPs described in config/white_list_ip.yml
|
|
2
|
-
white_list_ip_config_file=['config/white_list_ip.yml', 'config/white_list_ip.yml.template'].find{|f| File.file?(f)}
|
|
3
|
-
WhiteListIPs=Regexp.new(
|
|
4
|
-
"^("<<
|
|
5
|
-
YAML.load_file(white_list_ip_config_file)["Allow"].collect{|ip|
|
|
6
|
-
ip.downcase.include?("all") ? /.*/ : Regexp.escape(ip)
|
|
7
|
-
}.join("|")<<")"
|
|
8
|
-
) rescue /^(127\.0\.0\.1|0\.0\.0\.0)/
|
data/spec/picolena_spec.rb
DELETED
data/tasks/default.rake
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
Rake::Task[:default].prerequisites.clear
|