picolena 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +11 -0
- data/Manifest.txt +11 -6
- data/config/files_to_clean +12 -0
- data/config/hoe.rb +7 -3
- data/lib/picolena/{templates/config/initializers/001_prepare_and_load_custom_config.rb → config/basic.rb} +14 -19
- data/lib/picolena/config/title_and_names_and_links.yml +26 -0
- data/lib/picolena/picolena_generator.rb +8 -3
- data/lib/picolena/templates/app/controllers/application.rb +10 -3
- data/lib/picolena/templates/app/controllers/documents_controller.rb +26 -7
- data/lib/picolena/templates/app/helpers/application_helper.rb +1 -4
- data/lib/picolena/templates/app/helpers/documents_helper.rb +29 -7
- data/lib/picolena/templates/app/models/document.rb +53 -19
- data/lib/picolena/templates/app/models/finder.rb +30 -14
- data/lib/picolena/templates/app/views/documents/_document.html.haml +5 -2
- data/lib/picolena/templates/app/views/documents/cached.html.haml +8 -0
- data/lib/picolena/templates/app/views/documents/{show_content.html.haml → content.html.haml} +1 -1
- data/lib/picolena/templates/app/views/documents/show.html.haml +1 -1
- data/lib/picolena/templates/config/boot.rb +4 -4
- data/lib/picolena/templates/config/initializers/001_load_custom_config.rb +3 -0
- data/lib/picolena/templates/config/initializers/{002_indexed_dirs.rb → 002_load_indexed_dirs.rb} +1 -1
- data/lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb +8 -0
- data/lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb +5 -0
- data/lib/picolena/templates/config/routes.rb +1 -1
- data/lib/picolena/templates/lang/ui/de.yml +2 -4
- data/lib/picolena/templates/lang/ui/en.yml +2 -4
- data/lib/picolena/templates/lang/ui/es.yml +3 -5
- data/lib/picolena/templates/lang/ui/fr.yml +3 -5
- data/lib/picolena/templates/lib/core_exts.rb +1 -1
- data/lib/picolena/templates/lib/ff.rb +4 -4
- data/lib/picolena/templates/spec/controllers/documents_controller_spec.rb +9 -8
- data/lib/picolena/templates/spec/models/basic_finder_spec.rb +3 -10
- data/lib/picolena/templates/spec/models/document_spec.rb +2 -2
- data/lib/picolena/templates/spec/models/finder_spec.rb +30 -9
- data/lib/picolena/templates/spec/models/host_indexing_system_spec.rb +12 -6
- data/lib/picolena/version.rb +1 -1
- data/lib/rubigen_ext.rb +3 -3
- data/tasks/hack.rake +17 -0
- data/tasks/rspec.rake +6 -20
- data/tasks/test.rake +3 -0
- data/website/index.html +2 -2
- data.tar.gz.sig +0 -0
- metadata +24 -10
- metadata.gz.sig +0 -0
- data/lib/picolena/templates/config/initializers/003_white_list_IPs.rb +0 -8
- data/spec/picolena_spec.rb +0 -3
- data/tasks/default.rake +0 -1
data/History.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== 0.1.1 2008-04-12
|
2
|
+
|
3
|
+
* major enhancement:
|
4
|
+
* cache à la Google
|
5
|
+
|
6
|
+
* minor enhancements:
|
7
|
+
* easier configuration. Every picolena specific conf files in config/custom
|
8
|
+
* lots of refactoring
|
9
|
+
|
10
|
+
* bug fixes
|
11
|
+
|
1
12
|
== 0.1.0 2008-04-08
|
2
13
|
|
3
14
|
* minor enhancements:
|
data/Manifest.txt
CHANGED
@@ -4,10 +4,13 @@ Manifest.txt
|
|
4
4
|
README.txt
|
5
5
|
Rakefile
|
6
6
|
bin/picolena
|
7
|
+
config/files_to_clean
|
7
8
|
config/hoe.rb
|
8
9
|
config/requirements.rb
|
9
10
|
lib/picolena/USAGE
|
11
|
+
lib/picolena/config/basic.rb
|
10
12
|
lib/picolena/config/indexed_directories.yml
|
13
|
+
lib/picolena/config/title_and_names_and_links.yml
|
11
14
|
lib/picolena/config/white_list_ip.yml
|
12
15
|
lib/picolena/picolena_generator.rb
|
13
16
|
lib/picolena/templates/MIT-LICENSE
|
@@ -19,19 +22,21 @@ lib/picolena/templates/app/helpers/documents_helper.rb
|
|
19
22
|
lib/picolena/templates/app/models/document.rb
|
20
23
|
lib/picolena/templates/app/models/finder.rb
|
21
24
|
lib/picolena/templates/app/views/documents/_document.html.haml
|
25
|
+
lib/picolena/templates/app/views/documents/cached.html.haml
|
26
|
+
lib/picolena/templates/app/views/documents/content.html.haml
|
22
27
|
lib/picolena/templates/app/views/documents/index.html.haml
|
23
28
|
lib/picolena/templates/app/views/documents/show.html.haml
|
24
|
-
lib/picolena/templates/app/views/documents/show_content.html.haml
|
25
29
|
lib/picolena/templates/app/views/layouts/application.html.haml
|
26
30
|
lib/picolena/templates/config/boot.rb
|
27
31
|
lib/picolena/templates/config/environment.rb
|
28
32
|
lib/picolena/templates/config/environments/development.rb
|
29
33
|
lib/picolena/templates/config/environments/production.rb
|
30
34
|
lib/picolena/templates/config/environments/test.rb
|
31
|
-
lib/picolena/templates/config/initializers/
|
32
|
-
lib/picolena/templates/config/initializers/
|
33
|
-
lib/picolena/templates/config/initializers/
|
35
|
+
lib/picolena/templates/config/initializers/001_load_custom_config.rb
|
36
|
+
lib/picolena/templates/config/initializers/002_load_indexed_dirs.rb
|
37
|
+
lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb
|
34
38
|
lib/picolena/templates/config/initializers/004_load_filters.rb
|
39
|
+
lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb
|
35
40
|
lib/picolena/templates/config/routes.rb
|
36
41
|
lib/picolena/templates/lang/ui/de.yml
|
37
42
|
lib/picolena/templates/lang/ui/en.yml
|
@@ -765,13 +770,13 @@ script/destroy
|
|
765
770
|
script/generate
|
766
771
|
script/txt2html
|
767
772
|
setup.rb
|
768
|
-
spec/picolena_spec.rb
|
769
|
-
tasks/default.rake
|
770
773
|
tasks/deployment.rake
|
771
774
|
tasks/environment.rake
|
775
|
+
tasks/hack.rake
|
772
776
|
tasks/rdoc.rake
|
773
777
|
tasks/ri.rake
|
774
778
|
tasks/rspec.rake
|
779
|
+
tasks/test.rake
|
775
780
|
tasks/website.rake
|
776
781
|
website/index.html
|
777
782
|
website/index.txt
|
@@ -0,0 +1,12 @@
|
|
1
|
+
lib/picolena/templates/LICENSE
|
2
|
+
lib/picolena/templates/README
|
3
|
+
lib/picolena/templates/config/custom/picolena.rb
|
4
|
+
lib/picolena/templates/config/custom/indexed_directories.yml
|
5
|
+
lib/picolena/templates/config/custom/white_list_ip.yml
|
6
|
+
lib/picolena/templates/config/custom/title_and_names_and_links.yml
|
7
|
+
lib/picolena/templates/log
|
8
|
+
lib/picolena/templates/spec/test_dirs/indexed/others/bäñüßé.txt
|
9
|
+
lib/picolena/templates/tmp
|
10
|
+
lib/picolena/templates/vendor
|
11
|
+
lib/picolena/templates/coverage
|
12
|
+
lib/picolena/templates/doc
|
data/config/hoe.rb
CHANGED
@@ -54,17 +54,21 @@ hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
|
54
54
|
p.url = HOMEPATH
|
55
55
|
p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
|
56
56
|
p.test_globs = ["spec/picolena_spec.rb"]
|
57
|
-
|
57
|
+
|
58
|
+
files_to_clean = File.readlines(File.join(File.dirname(__FILE__),'files_to_clean')).collect{|line| line.chomp}
|
59
|
+
p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']+files_to_clean #An array of file patterns to delete on clean.
|
58
60
|
|
59
61
|
# == Optional
|
60
62
|
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
61
63
|
p.extra_deps = [ ['rails', '>= 2.0.2'],
|
62
|
-
#Ferret 0.11.6 is not yet available for win32
|
64
|
+
# Ferret 0.11.6 is not yet available for win32
|
65
|
+
# FIXME: How to require 0.11.6 for *nix and 0.11.5 for win32?
|
63
66
|
['ferret', '>= 0.11.5'],
|
64
67
|
['haml', '>= 1.8.2'],
|
65
68
|
['rubyzip', '>= 0.9.1'],
|
66
69
|
['rubigen', '>= 1.2.4'],
|
67
|
-
['paginator', '>= 1.1.0']
|
70
|
+
['paginator', '>= 1.1.0'],
|
71
|
+
['rspec', '>= 1.1.3']
|
68
72
|
]
|
69
73
|
|
70
74
|
#p.spec_extras = {} # A hash of extra values to set in the gemspec.
|
@@ -1,16 +1,3 @@
|
|
1
|
-
### You should not modify this file if you'd like to customize your search engine.
|
2
|
-
### Please modify config/custom.rb instead.
|
3
|
-
### A template config/custom.rb will be created the first time you launch your web server.
|
4
|
-
|
5
|
-
|
6
|
-
custom_config_file = File.join(RAILS_ROOT, 'config/custom.rb')
|
7
|
-
|
8
|
-
File.open(custom_config_file,'w'){|custom|
|
9
|
-
custom.puts <<-DEFAULT_CONF
|
10
|
-
### This file has been automatically generated the first time you launched your web server.
|
11
|
-
### You should add custom requirements here, they will be loaded everytime you restart the web server.
|
12
|
-
|
13
|
-
|
14
1
|
# Specify indexes path.
|
15
2
|
# Storage should be sufficient in order to store all indexed data.
|
16
3
|
IndexesSavePath=File.join(RAILS_ROOT, 'tmp/ferret_indexes/')
|
@@ -27,14 +14,22 @@ Globalite.language = :en
|
|
27
14
|
# Specify which locale should be used by Ferret
|
28
15
|
Ferret.locale = "en_US.UTF-8"
|
29
16
|
|
17
|
+
|
30
18
|
# Results per page
|
31
19
|
ResultsPerPage = 10
|
32
20
|
|
21
|
+
|
22
|
+
# Length of "probably unique id" 's
|
23
|
+
# Those id's are used to characterize every document, thus allowing tiny URLs in Controllers
|
24
|
+
# HashLength = 10
|
25
|
+
# Document.new("whatever.pdf").probably_unique_id => "bbuxhynait"
|
26
|
+
# HashLength = 20
|
27
|
+
# Document.new("whatever.pdf").probably_unique_id => "jfzjkyfkfkbbuxhynait"
|
28
|
+
# The more documents you have, the bigger HashLength should be in order to avoid collisions.
|
29
|
+
# It would not be wise (and specs won't pass) to specify HashLength smaller than 10.
|
30
|
+
HashLength = 10
|
31
|
+
|
32
|
+
|
33
33
|
# Specify the default Levenshtein distance when using FuzzyQuery
|
34
34
|
# see http://ferret.davebalmain.com/api/classes/Ferret/QueryParser.html for more information.
|
35
|
-
Ferret::Search::FuzzyQuery.default_min_similarity=0.6
|
36
|
-
DEFAULT_CONF
|
37
|
-
} unless File.readable?(custom_config_file)
|
38
|
-
|
39
|
-
require 'ferret'
|
40
|
-
require custom_config_file
|
35
|
+
Ferret::Search::FuzzyQuery.default_min_similarity=0.6
|
@@ -0,0 +1,26 @@
|
|
1
|
+
## Link to a website, located in the bottom-left corner.
|
2
|
+
|
3
|
+
website_name: Picolena v<%= version %>
|
4
|
+
website_url: http://picolena.devjavu.com/
|
5
|
+
|
6
|
+
## Link to a backup search engine, if no document was found for a given query.
|
7
|
+
|
8
|
+
backup_search_engine_name: Google?
|
9
|
+
backup_search_engine_url: http://www.google.com/search?q=
|
10
|
+
#backup_search_engine_name: Yahoo!
|
11
|
+
#backup_search_engine_url: http://search.yahoo.com/search?p=
|
12
|
+
|
13
|
+
|
14
|
+
## Fields that are not blank will overwrite localization.
|
15
|
+
## Leave them as they are if you're happy with the current localization.
|
16
|
+
|
17
|
+
# page_title: Picolena Search Engine
|
18
|
+
page_title:
|
19
|
+
# help: Help
|
20
|
+
help:
|
21
|
+
# help_file: /help/PicolenaHowTo-en.pdf
|
22
|
+
help_file:
|
23
|
+
# type_your_query_here: Type your query here
|
24
|
+
type_your_query_here:
|
25
|
+
# search: Search!
|
26
|
+
search:
|
@@ -2,7 +2,7 @@ require 'tempfile'
|
|
2
2
|
require 'fileutils'
|
3
3
|
require 'pathname'
|
4
4
|
|
5
|
-
class PicolenaGenerator < RubiGen::Base
|
5
|
+
class PicolenaGenerator < RubiGen::Base #:nodoc:
|
6
6
|
|
7
7
|
DEFAULT_SHEBANG = File.join(Config::CONFIG['bindir'],
|
8
8
|
Config::CONFIG['ruby_install_name'])
|
@@ -58,12 +58,15 @@ class PicolenaGenerator < RubiGen::Base
|
|
58
58
|
end
|
59
59
|
|
60
60
|
# Picolena configuration files
|
61
|
-
m.file '../config/white_list_ip.yml', 'config/white_list_ip.yml'
|
62
|
-
m.
|
61
|
+
m.file '../config/white_list_ip.yml', 'config/custom/white_list_ip.yml'
|
62
|
+
m.file '../config/basic.rb', 'config/custom/picolena.rb'
|
63
|
+
m.template '../config/indexed_directories.yml', 'config/custom/indexed_directories.yml', :assigns => {:directories_to_index => @directories_to_index}
|
64
|
+
m.template '../config/title_and_names_and_links.yml', 'config/custom/title_and_names_and_links.yml', :assigns => {:version => Picolena::VERSION::STRING}
|
63
65
|
|
64
66
|
# README, License & Rakefile
|
65
67
|
m.file 'MIT-LICENSE', 'LICENSE'
|
66
68
|
m.file '../../../README.txt', 'README'
|
69
|
+
m.file '../../../README.txt', 'doc/README_FOR_APP'
|
67
70
|
m.file 'Rakefile', 'Rakefile'
|
68
71
|
|
69
72
|
unless options[:no_index]
|
@@ -128,6 +131,8 @@ EOS
|
|
128
131
|
config
|
129
132
|
config/environments
|
130
133
|
config/initializers
|
134
|
+
config/custom
|
135
|
+
doc
|
131
136
|
lang/ui
|
132
137
|
lib
|
133
138
|
lib/filters
|
@@ -1,13 +1,18 @@
|
|
1
|
+
# ApplicationController just checks every incoming request according to the remote IP address.
|
2
|
+
#
|
3
|
+
# The request is sent to DocumentsController only if the IP is included in the white list.
|
4
|
+
# Otherwise, it returns "Access denied" 403.
|
5
|
+
|
1
6
|
class ApplicationController < ActionController::Base
|
2
7
|
session :disabled => true
|
3
8
|
before_filter :should_only_be_available_for_white_list_IPs, :except=> :access_denied
|
4
9
|
|
5
|
-
#
|
10
|
+
# Returns 403 status in case of an unknown remote IP address
|
6
11
|
def access_denied
|
7
|
-
render :text=>
|
12
|
+
render :text=>request.inspect, :status => 403
|
8
13
|
end
|
9
14
|
|
10
|
-
#
|
15
|
+
# Redirects to documents_url in case route hasn't been recognised
|
11
16
|
def unknown_request
|
12
17
|
flash[:warning]="Unknown URL"
|
13
18
|
redirect_to documents_url
|
@@ -15,6 +20,8 @@ class ApplicationController < ActionController::Base
|
|
15
20
|
|
16
21
|
private
|
17
22
|
|
23
|
+
# Tries to match remote IP address with the white list defined in config/custom/white_list_ip.yml
|
24
|
+
# Redirects to :access_denied if the remote IP is not white listed.
|
18
25
|
def should_only_be_available_for_white_list_IPs
|
19
26
|
unless request.remote_ip =~ WhiteListIPs
|
20
27
|
redirect_to :controller => 'application', :action=>'access_denied'
|
@@ -1,5 +1,13 @@
|
|
1
|
+
# Core controller of Picolena search-engine.
|
2
|
+
# DocumentsController
|
3
|
+
# - treats queries
|
4
|
+
# - launches searches
|
5
|
+
# - returns matching documents
|
6
|
+
# - displays document content
|
7
|
+
# - displays cached content.
|
8
|
+
|
1
9
|
class DocumentsController < ApplicationController
|
2
|
-
before_filter :check_if_valid_link, :only=> [:download, :
|
10
|
+
before_filter :check_if_valid_link, :only=> [:download, :content, :cached]
|
3
11
|
|
4
12
|
# Actually doesn't check anything at all. Just a redirect to show_document(query)
|
5
13
|
#
|
@@ -16,7 +24,7 @@ class DocumentsController < ApplicationController
|
|
16
24
|
page=params[:page]||1
|
17
25
|
finder=Finder.new(@query,page)
|
18
26
|
finder.execute!
|
19
|
-
pager=::Paginator.new(finder.total_hits, ResultsPerPage) do
|
27
|
+
pager=::Paginator.new(finder.total_hits, ResultsPerPage) do
|
20
28
|
finder.matching_documents
|
21
29
|
end
|
22
30
|
@matching_documents=pager.page(page)
|
@@ -25,22 +33,33 @@ class DocumentsController < ApplicationController
|
|
25
33
|
end
|
26
34
|
|
27
35
|
|
28
|
-
# Download the file whose
|
36
|
+
# Download the file whose probably_unique_id is given.
|
29
37
|
# If the checksum is incorrect, redirect to documents_url via no_valid_link
|
30
38
|
def download
|
31
|
-
send_file @document.complete_path
|
39
|
+
send_file @document.complete_path
|
32
40
|
end
|
33
41
|
|
34
|
-
|
42
|
+
# Returns the content of the document identified by probably_unique_id, as it is *now*.
|
43
|
+
def content
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns the content of the document identified by probably_unique_id, as it was at the time it was indexed.
|
47
|
+
# similar to Google cache.
|
48
|
+
def cached
|
35
49
|
end
|
36
50
|
|
37
51
|
private
|
38
52
|
|
53
|
+
# Returns corresponding document for any given "probably unique id"
|
54
|
+
# Redirects to no_valid_link if:
|
55
|
+
# there are more than one matching document (hash collision)
|
56
|
+
# there is no matching document (wrong hash)
|
39
57
|
def check_if_valid_link
|
40
|
-
@
|
41
|
-
@document=
|
58
|
+
@probably_unique_id=params[:id]
|
59
|
+
@document=Document.find_by_unique_id(@probably_unique_id) rescue no_valid_link
|
42
60
|
end
|
43
61
|
|
62
|
+
# Flashes a warning and redirects to documents_url.
|
44
63
|
def no_valid_link
|
45
64
|
flash[:warning]="no valid link"
|
46
65
|
redirect_to documents_url
|
@@ -1,10 +1,19 @@
|
|
1
1
|
module DocumentsHelper
|
2
|
+
# Returns true if no document as been found for a given query.
|
3
|
+
def nothing_found?
|
4
|
+
@matching_documents.nil? or @matching_documents.entries.empty?
|
5
|
+
end
|
6
|
+
|
7
|
+
# Very basic pagination.
|
8
|
+
# Provides liks to Next, Prev and FirstPage when needed.
|
2
9
|
def should_paginate(page,query)
|
3
10
|
[(link_to("←←", :action => :show, :id => query, :page => 1) if page.number>2),
|
4
11
|
(link_to("←", :action => :show, :id => query, :page => page.prev.number) if page.prev?),
|
5
12
|
(link_to("→", :action => :show, :id => query, :page => page.next.number) if page.next?)].compact.join(" | ")
|
6
13
|
end
|
7
14
|
|
15
|
+
# Returns a localized sentence like "Results 1-10 of 12 for Zimbabwe (0.472s)" or
|
16
|
+
# "Résultats 1-2 parmi 2 pour whatever (0.012s)"
|
8
17
|
def describe_results(page, total_hits, dt, query)
|
9
18
|
[:results.l,
|
10
19
|
content_tag(:strong,"#{page.first_item_number}-#{page.last_item_number}"),
|
@@ -16,20 +25,25 @@ module DocumentsHelper
|
|
16
25
|
].join(' ')
|
17
26
|
end
|
18
27
|
|
28
|
+
# Returns the time needed to treat the query and launch the search, with a ms precision : (0.472s)
|
19
29
|
def show_time_needed(dt)
|
20
30
|
content_tag(:small,'('<<number_with_precision(dt,3)<<'s)')
|
21
31
|
end
|
22
|
-
|
32
|
+
|
33
|
+
# When possible, highlights content of the document that match the query.
|
23
34
|
def highlight_matching_content(document)
|
24
35
|
content_tag(:ul,document.matching_content.collect{|sentence|
|
25
36
|
content_tag(:li,h(sentence).gsub(/<<(.*?)>>/,'<strong>\1</strong>').gsub(/\v|\f/,''))
|
26
37
|
}) if document.matching_content
|
27
38
|
end
|
28
39
|
|
29
|
-
|
30
|
-
|
40
|
+
# Returns icon and filename for any given document.
|
41
|
+
def icon_and_filename_for(document)
|
42
|
+
[icon_for(document.extname),document.filename].join(" ")
|
31
43
|
end
|
32
44
|
|
45
|
+
# Returns the location (if avaible) of the filetype icon.
|
46
|
+
# TODO: Move this hash to a .yml config file.
|
33
47
|
def icon_for(filetype)
|
34
48
|
pic_for_exts={
|
35
49
|
:xls=>%w{xls xlsx ods},
|
@@ -53,18 +67,26 @@ module DocumentsHelper
|
|
53
67
|
image_tag("icons/#{pic.first}.png") if pic
|
54
68
|
end
|
55
69
|
|
56
|
-
|
57
|
-
|
70
|
+
# Returns a link to a backup search engine that could maybe find more results for the same query.
|
71
|
+
def link_to_backup_search_engine(query)
|
72
|
+
link_to :backup_search_engine_name.l, :backup_search_engine_url.l<<query
|
58
73
|
end
|
59
74
|
|
75
|
+
# For any indexed document, returns a link to its containing directory.
|
60
76
|
def link_to_containing_directory(document)
|
61
77
|
link_name=image_tag('icons/remote_folder.png')<<' '<<content_tag(:small,document.alias_path)
|
62
78
|
link_to link_name, document.alias_path, :target=>'_blank'
|
63
79
|
end
|
64
80
|
|
81
|
+
# For any indexed document, returns a link to show its content.
|
65
82
|
def link_to_plain_text_content(document)
|
66
|
-
return unless document.supported?
|
67
83
|
link_name=image_tag('icons/plain_text_small.png')<<' '<<content_tag(:small,:text_content.l)
|
68
|
-
link_to link_name,
|
84
|
+
link_to link_name, content_document_path(document.probably_unique_id)
|
85
|
+
end
|
86
|
+
|
87
|
+
# For any indexed document, returns a link to show its cached content.
|
88
|
+
def link_to_cached_content(document)
|
89
|
+
link_name="("<<content_tag(:small,:cached.l)<<")"
|
90
|
+
link_to link_name, cached_document_path(document.probably_unique_id)
|
69
91
|
end
|
70
92
|
end
|
@@ -1,64 +1,98 @@
|
|
1
|
+
# Document class retrieves information from filesystem and the index for any given document.
|
1
2
|
class Document
|
2
3
|
attr_reader :complete_path
|
3
|
-
attr_accessor :user, :score, :matching_content
|
4
|
+
attr_accessor :user, :score, :matching_content, :index_id
|
4
5
|
|
5
|
-
def initialize(
|
6
|
-
@complete_path
|
6
|
+
def initialize(path)
|
7
|
+
#To ensure @complete_path is an absolute direction.
|
8
|
+
@complete_path=File.expand_path(path)
|
7
9
|
validate_existence_of_file
|
8
10
|
validate_in_indexed_directory
|
9
11
|
end
|
10
12
|
|
11
|
-
|
12
|
-
id
|
13
|
-
end
|
14
|
-
|
15
|
-
def to_s
|
16
|
-
filename
|
17
|
-
end
|
13
|
+
alias_method :to_param, :id
|
18
14
|
|
19
15
|
#Delegating properties to File::method_name(complete_path)
|
20
16
|
[:dirname, :basename, :extname, :size?, :file?, :read, :ext_as_sym].each{|method_name|
|
21
17
|
define_method(method_name){File.send(method_name,complete_path)}
|
22
18
|
}
|
23
19
|
alias_method :size, :size?
|
24
|
-
alias_method :content, :read
|
25
20
|
alias_method :filename, :basename
|
21
|
+
alias_method :to_s, :basename
|
26
22
|
|
23
|
+
# Returns filename without extension
|
24
|
+
# "buildings.odt" => "buildings"
|
27
25
|
def basename
|
28
26
|
filename.chomp(extname)
|
29
27
|
end
|
30
|
-
|
31
|
-
def absolute_dirname
|
32
|
-
Pathname.new(dirname).realpath.to_s
|
33
|
-
end
|
34
28
|
|
29
|
+
# End users should not always know where documents are stored internally.
|
30
|
+
# An alias path can be specified in config/indexed_directories.yml
|
31
|
+
#
|
32
|
+
# For example, with:
|
33
|
+
# "/media/wiki_dump/" : "http://www.mycompany.com/wiki/"
|
34
|
+
#
|
35
|
+
# The documents
|
36
|
+
# "/media/wiki_dump/organigram.odp"
|
37
|
+
# will be displayed as being:
|
38
|
+
# "http://www.mycompany.com/wiki/organigram.odp"
|
35
39
|
def alias_path
|
36
40
|
original_dir=indexed_directory
|
37
41
|
alias_dir=IndexedDirectories[original_dir]
|
38
|
-
|
42
|
+
dirname.sub(original_dir,alias_dir)
|
39
43
|
end
|
40
44
|
|
41
|
-
|
42
|
-
|
45
|
+
# Returns an id for this document.
|
46
|
+
# This id will be used in Controllers in order to get tiny urls.
|
47
|
+
# Since it's a base26 hash of the absolute filename, it can only be "probably unique".
|
48
|
+
# For huge amount of indexed documents, it would be wise to increase HashLength in config/custom/picolena.rb
|
49
|
+
def probably_unique_id
|
50
|
+
@probably_unique_id||=complete_path.base26_hash
|
43
51
|
end
|
44
52
|
|
53
|
+
# Returns true iff some Filter has been defined to convert it to plain text.
|
54
|
+
# Document.new("presentation.pdf").supported? => true
|
55
|
+
# Document.new("presentation.some_weird_extension").supported? => false
|
45
56
|
def supported?
|
46
57
|
PlainText.supported_extensions.include?(self.ext_as_sym)
|
47
58
|
end
|
48
59
|
|
60
|
+
# Retrieves content as it is *now*.
|
49
61
|
def content
|
50
62
|
PlainText.extract_content_from(complete_path)
|
51
63
|
end
|
52
64
|
|
65
|
+
# Cache à la Google.
|
66
|
+
# Returns content as it was at the time it was indexed.
|
67
|
+
def cached
|
68
|
+
get_index_id! unless index_id
|
69
|
+
Finder.index[index_id][:content]
|
70
|
+
end
|
71
|
+
|
72
|
+
# Returns the last modification date before the document got indexed.
|
73
|
+
# Useful to know how old a document is, and to which version the cache corresponds.
|
74
|
+
def date
|
75
|
+
get_index_id! unless index_id
|
76
|
+
Finder.index[index_id][:date].sub(/(\d{4})(\d{2})(\d{2})/,'\1-\2-\3')
|
77
|
+
end
|
78
|
+
|
53
79
|
private
|
54
80
|
|
81
|
+
def get_index_id!
|
82
|
+
@index_id = Document.find_by_unique_id(probably_unique_id).index_id
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.find_by_unique_id(some_id)
|
86
|
+
Finder.new("probably_unique_id:"<<some_id).matching_document
|
87
|
+
end
|
88
|
+
|
55
89
|
def in_indexed_directory?
|
56
90
|
!indexed_directory.nil?
|
57
91
|
end
|
58
92
|
|
59
93
|
def indexed_directory
|
60
94
|
IndexedDirectories.keys.find{|indexed_dir|
|
61
|
-
|
95
|
+
dirname.starts_with?(indexed_dir)
|
62
96
|
}
|
63
97
|
end
|
64
98
|
|
@@ -1,14 +1,21 @@
|
|
1
1
|
require 'ff'
|
2
2
|
|
3
3
|
class Finder
|
4
|
-
|
4
|
+
#FIXME: Should not use all those class methods to access index.
|
5
|
+
|
6
|
+
attr_reader :query
|
7
|
+
|
8
|
+
def self.index
|
9
|
+
# caching index @@index ||=
|
10
|
+
# causes ferret-0.11.6/lib/ferret/index.rb:768: [BUG] Segmentation fault
|
11
|
+
Ferret::Index::Index.new(:path => IndexSavePath, :analyzer=>Analyzer)
|
12
|
+
end
|
5
13
|
|
6
14
|
def initialize(raw_query,page=1,results_per_page=ResultsPerPage)
|
7
15
|
query_parser = Ferret::QueryParser.new(:fields => [:content, :file, :basename, :filetype, :date], :or_default => false, :analyzer=>Analyzer)
|
8
16
|
@query = query_parser.parse(convert_to_english(raw_query))
|
9
17
|
@raw_query= raw_query
|
10
18
|
Finder.ensure_that_index_exists_on_disk
|
11
|
-
@index = Ferret::Index::Index.new(:path => IndexSavePath, :analyzer=>Analyzer)
|
12
19
|
@per_page=results_per_page
|
13
20
|
@offset=(page.to_i-1)*results_per_page
|
14
21
|
validate_that_index_has_documents
|
@@ -18,16 +25,17 @@ class Finder
|
|
18
25
|
@matching_documents=[]
|
19
26
|
start=Time.now
|
20
27
|
begin
|
21
|
-
top_docs=index.search(query, :limit => @per_page, :offset=>@offset)
|
28
|
+
top_docs=Finder.index.search(query, :limit => @per_page, :offset=>@offset)
|
22
29
|
top_docs.hits.each{|hit|
|
23
|
-
|
30
|
+
index_id,score=hit.doc,hit.score
|
24
31
|
begin
|
25
|
-
found_doc=Document.new(index[
|
26
|
-
found_doc.matching_content=index.highlight(query,
|
32
|
+
found_doc=Document.new(Finder.index[index_id][:complete_path])
|
33
|
+
found_doc.matching_content=Finder.index.highlight(query, index_id,
|
27
34
|
:field => :content, :excerpt_length => 80,
|
28
35
|
:pre_tag => "<<", :post_tag => ">>"
|
29
36
|
) unless @raw_query=~/^\*+\.\w*$/
|
30
37
|
found_doc.score=score
|
38
|
+
found_doc.index_id=index_id
|
31
39
|
@matching_documents<<found_doc
|
32
40
|
rescue Errno::ENOENT
|
33
41
|
#"File has been moved/deleted!"
|
@@ -37,14 +45,20 @@ class Finder
|
|
37
45
|
@time_needed=Time.now-start
|
38
46
|
@total_hits=top_docs.total_hits
|
39
47
|
ensure
|
40
|
-
index.close
|
48
|
+
#index.close
|
41
49
|
end
|
42
50
|
end
|
43
51
|
|
52
|
+
# Returns true if it has been executed.
|
44
53
|
def executed?
|
45
54
|
@executed
|
46
55
|
end
|
47
56
|
|
57
|
+
# To ensure that
|
58
|
+
# matching_documents
|
59
|
+
# total_hits
|
60
|
+
# time_needed
|
61
|
+
# methods are called only after the index has been searched.
|
48
62
|
[:matching_documents, :total_hits, :time_needed].each{|attribute_name|
|
49
63
|
define_method(attribute_name){
|
50
64
|
execute! unless executed?
|
@@ -52,18 +66,19 @@ class Finder
|
|
52
66
|
}
|
53
67
|
}
|
54
68
|
|
69
|
+
# Returns true if index is existing.
|
55
70
|
def self.has_index?
|
56
71
|
index_filename and File.exists?(index_filename)
|
57
72
|
end
|
58
73
|
|
74
|
+
# Returns true if there's at least one document indexed.
|
59
75
|
def has_documents?
|
60
|
-
index.size>0
|
61
|
-
end
|
62
|
-
|
63
|
-
def self.up_to_date?
|
64
|
-
IndexedDirectories.keys.all?{|dir| File.mtime(index_filename) > File.mtime(dir)}
|
76
|
+
Finder.index.size>0
|
65
77
|
end
|
66
78
|
|
79
|
+
# Returns matching document for any given query, if only
|
80
|
+
# exactly one document is found.
|
81
|
+
# Raises otherwise.
|
67
82
|
def matching_document
|
68
83
|
case matching_documents.size
|
69
84
|
when 0
|
@@ -77,7 +92,8 @@ class Finder
|
|
77
92
|
|
78
93
|
private
|
79
94
|
|
80
|
-
|
95
|
+
# Convert query keywords to english so they can be parsed by Ferret.
|
96
|
+
def convert_to_english(query)
|
81
97
|
to_en={
|
82
98
|
/\b#{:AND.l}\b/=>'AND',
|
83
99
|
/\b#{:OR.l}\b/=>'OR',
|
@@ -87,7 +103,7 @@ class Finder
|
|
87
103
|
/#{:date.l}:/ => 'date:',
|
88
104
|
/\b#{:LIKE.l}\s+(\S+)/=>'\1~'
|
89
105
|
}
|
90
|
-
to_en.inject(
|
106
|
+
to_en.inject(query){|mem,non_english_to_english_keyword|
|
91
107
|
mem.gsub(*non_english_to_english_keyword)
|
92
108
|
}
|
93
109
|
end
|
@@ -1,7 +1,10 @@
|
|
1
1
|
%h2
|
2
|
-
=link_to icon_and_filename_for(document), download_document_path(document.
|
2
|
+
=link_to icon_and_filename_for(document), download_document_path(document.probably_unique_id)
|
3
3
|
%small=number_to_percentage(document.score*100, :precision=>1)
|
4
4
|
=highlight_matching_content(document)
|
5
5
|
%p=link_to_containing_directory(document)
|
6
|
-
|
6
|
+
- if document.supported?
|
7
|
+
%p
|
8
|
+
=link_to_plain_text_content(document)
|
9
|
+
=link_to_cached_content(document)
|
7
10
|
%hr/
|
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
RAILS_ROOT = "#{File.dirname(__FILE__)}/.." unless defined?(RAILS_ROOT)
|
5
5
|
|
6
|
-
module Rails
|
6
|
+
module Rails #:nodoc:
|
7
7
|
class << self
|
8
8
|
def boot!
|
9
9
|
unless booted?
|
@@ -34,20 +34,20 @@ module Rails
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
class Boot
|
37
|
+
class Boot #:nodoc:
|
38
38
|
def run
|
39
39
|
load_initializer
|
40
40
|
Rails::Initializer.run(:set_load_path)
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
-
class VendorBoot < Boot
|
44
|
+
class VendorBoot < Boot #:nodoc:
|
45
45
|
def load_initializer
|
46
46
|
require "#{RAILS_ROOT}/vendor/rails/railties/lib/initializer"
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
-
class GemBoot < Boot
|
50
|
+
class GemBoot < Boot #:nodoc:
|
51
51
|
def load_initializer
|
52
52
|
self.class.load_rubygems
|
53
53
|
load_rails_gem
|
data/lib/picolena/templates/config/initializers/{002_indexed_dirs.rb → 002_load_indexed_dirs.rb}
RENAMED
@@ -1,5 +1,5 @@
|
|
1
1
|
#Loading directories to be indexed
|
2
|
-
indexed_dir_config_file=
|
2
|
+
indexed_dir_config_file='config/custom/indexed_directories.yml'
|
3
3
|
IndexedDirectories={}
|
4
4
|
YAML.load_file(indexed_dir_config_file)[RAILS_ENV].each_pair{|abs_or_rel_path, alias_path|
|
5
5
|
IndexedDirectories[Pathname(abs_or_rel_path).realpath.to_s]=alias_path
|
@@ -0,0 +1,8 @@
|
|
1
|
+
#Deny all, Allow only IPs described in config/custom/white_list_ip.yml
|
2
|
+
white_list_ip_config_file='config/custom/white_list_ip.yml'
|
3
|
+
WhiteListIPs=Regexp.new(
|
4
|
+
"^("<<
|
5
|
+
YAML.load_file(white_list_ip_config_file)["Allow"].collect{|ip|
|
6
|
+
ip.downcase.include?("all") ? /.*/ : Regexp.escape(ip)
|
7
|
+
}.join("|")<<")"
|
8
|
+
) rescue /^(127\.0\.0\.1|0\.0\.0\.0)/
|
@@ -1,5 +1,5 @@
|
|
1
1
|
ActionController::Routing::Routes.draw do |map|
|
2
|
-
map.resources :documents, :collection=>{:check_query=>:post}, :member=>{:download=>:get, :
|
2
|
+
map.resources :documents, :collection=>{:check_query=>:post}, :member=>{:download=>:get, :content=>:get, :cached=>:get}
|
3
3
|
map.connect 'documents/:id', :controller=>'documents', :action=>'show', :id => /.*/
|
4
4
|
map.connect 'access_denied', :controller=> 'application', :action => 'access_denied'
|
5
5
|
map.connect "*anything", :controller=>'application', :action => 'unknown_request'
|
@@ -9,10 +9,8 @@ results: Ergebnisse
|
|
9
9
|
of: von
|
10
10
|
for: für
|
11
11
|
text_content: Text-Inhalt
|
12
|
-
|
13
|
-
|
14
|
-
website_name: Picolena
|
15
|
-
website_url: http://picolena.devjavu.com/
|
12
|
+
cached: cached
|
13
|
+
as_it_was_indexed_on: wie am
|
16
14
|
|
17
15
|
## Boolean operators
|
18
16
|
AND: UND
|
@@ -9,10 +9,8 @@ results: Results
|
|
9
9
|
of: of
|
10
10
|
for: for
|
11
11
|
text_content: Text content
|
12
|
-
|
13
|
-
|
14
|
-
website_name: Picolena
|
15
|
-
website_url: http://picolena.devjavu.com/
|
12
|
+
cached: cached
|
13
|
+
as_it_was_indexed_on: as it was indexed on
|
16
14
|
|
17
15
|
## Boolean operators
|
18
16
|
AND: AND
|
@@ -8,11 +8,9 @@ nothing_found_for: Ningún documento encontrado por
|
|
8
8
|
results: Resultados
|
9
9
|
of: de
|
10
10
|
for: por
|
11
|
-
text_content:
|
12
|
-
|
13
|
-
|
14
|
-
website_name: Picolena
|
15
|
-
website_url: http://picolena.devjavu.com/
|
11
|
+
text_content: ver contenido
|
12
|
+
cached: caché
|
13
|
+
as_it_was_indexed_on: como fue indexado el
|
16
14
|
|
17
15
|
## Boolean operators
|
18
16
|
AND: Y
|
@@ -8,11 +8,9 @@ nothing_found_for: Rien trouvé pour
|
|
8
8
|
results: Résultats
|
9
9
|
of: parmi
|
10
10
|
for: pour
|
11
|
-
text_content: contenu
|
12
|
-
|
13
|
-
|
14
|
-
website_name: Picolena
|
15
|
-
website_url: http://picolena.devjavu.com/
|
11
|
+
text_content: voir le contenu
|
12
|
+
cached: cached
|
13
|
+
as_it_was_indexed_on: comme indexé le
|
16
14
|
|
17
15
|
## Boolean operators
|
18
16
|
AND: ET
|
@@ -17,7 +17,7 @@ end
|
|
17
17
|
|
18
18
|
class String
|
19
19
|
# Creates a "probably unique" id with the desired length, composed only of lowercase letters.
|
20
|
-
def base26_hash(length=
|
20
|
+
def base26_hash(length=HashLength)
|
21
21
|
Digest::MD5.hexdigest(self).to_i(16).to_s(26).tr('0-9a-p', 'a-z')[-length,length]
|
22
22
|
end
|
23
23
|
end
|
@@ -20,11 +20,11 @@ def index_file(index, filename, mime_type=nil)
|
|
20
20
|
complete_path=File.expand_path(filename)
|
21
21
|
fields = {
|
22
22
|
:complete_path=> complete_path,
|
23
|
-
:
|
23
|
+
:probably_unique_id => complete_path.base26_hash,
|
24
24
|
:file => File.basename(filename),
|
25
25
|
:basename => File.basename(filename, File.extname(filename)).gsub(/_/,' '),
|
26
26
|
:filetype => File.extname(filename),
|
27
|
-
:date => File.mtime(filename).strftime("%Y%m%d")
|
27
|
+
:date => File.mtime(filename).strftime("%Y%m%d%H%M")
|
28
28
|
}
|
29
29
|
|
30
30
|
if mime_type then
|
@@ -112,6 +112,6 @@ def add_fields(index)
|
|
112
112
|
index.field_infos.add_field(:basename, :store => :no, :index => :yes, :boost => 1.5)
|
113
113
|
index.field_infos.add_field(:file, :store => :no, :index => :yes, :boost => 1.5)
|
114
114
|
index.field_infos.add_field(:filetype, :store => :no, :index => :yes, :boost => 1.5)
|
115
|
-
index.field_infos.add_field(:date, :store=>:
|
116
|
-
index.field_infos.add_field(:
|
115
|
+
index.field_infos.add_field(:date, :store=>:yes, :index=>:yes)
|
116
|
+
index.field_infos.add_field(:probably_unique_id, :store=>:no, :index=>:yes)
|
117
117
|
end
|
@@ -8,17 +8,18 @@ describe "DocumentsController called from unknown IP" do
|
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should deny access" do
|
11
|
-
|
11
|
+
# Displays a warning otherwise
|
12
|
+
Object.send(:remove_const, :WhiteListIPs) && WhiteListIPs=/Something that won't match/
|
12
13
|
get 'index'
|
13
14
|
response.should be_redirect
|
14
15
|
response.should redirect_to(:controller=>'application', :action=>'access_denied')
|
15
|
-
WhiteListIPs=/^0\.0\.0\.0/
|
16
|
+
Object.send(:remove_const, :WhiteListIPs) && WhiteListIPs=/^0\.0\.0\.0/
|
16
17
|
get 'index'
|
17
18
|
response.should be_success
|
18
19
|
end
|
19
20
|
|
20
21
|
after(:all) do
|
21
|
-
WhiteListIPs=@backup
|
22
|
+
Object.send(:remove_const, :WhiteListIPs) && WhiteListIPs=@backup
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
@@ -96,18 +97,18 @@ describe DocumentsController do
|
|
96
97
|
response.should be_success
|
97
98
|
assigns[:matching_documents].entries.should_not be_empty
|
98
99
|
d=assigns[:matching_documents].entries.first
|
99
|
-
get 'download', :id=>d.
|
100
|
+
get 'download', :id=>d.probably_unique_id
|
100
101
|
assigns[:document].complete_path == d.complete_path
|
101
102
|
response.should be_success
|
102
103
|
end
|
103
104
|
|
104
105
|
it "GET 'download' should redirect if wrong id" do
|
105
|
-
|
106
|
-
get 'download', :id=>
|
106
|
+
probably_unique_id="Not a document".base26_hash
|
107
|
+
get 'download', :id=>probably_unique_id
|
107
108
|
response.should be_redirect
|
108
109
|
response.should redirect_to(documents_url)
|
109
|
-
|
110
|
-
get 'download', :id=>
|
110
|
+
probably_unique_id='Whatever'
|
111
|
+
get 'download', :id=>probably_unique_id
|
111
112
|
response.should be_redirect
|
112
113
|
response.should redirect_to(documents_url)
|
113
114
|
end
|
@@ -17,7 +17,7 @@ describe "Finder without index on disk" do
|
|
17
17
|
IndexedDirectories.replace({'spec/test_dirs/indexed/just_one_doc'=>'//justonedoc/'})
|
18
18
|
lambda {@finder_with_new_index=Finder.new("test moi")}.should change(Finder, :has_index?).from(false).to(true)
|
19
19
|
File.exists?(File.join(@new_index_path,'_0.cfs')).should be_true
|
20
|
-
|
20
|
+
Finder.index.size.should >0
|
21
21
|
end
|
22
22
|
|
23
23
|
it "should raise if index is still empty after trying to create it" do
|
@@ -75,21 +75,14 @@ describe "Basic Finder" do
|
|
75
75
|
@finder=Finder.new("some query")
|
76
76
|
lambda {@finder.execute!}.should change(@finder, :executed?).from(false).to(true)
|
77
77
|
end
|
78
|
-
|
79
|
-
it "should know if index is up to date" do
|
80
|
-
Finder.has_index?.should be_true
|
81
|
-
Finder.should respond_to(:up_to_date?)
|
82
|
-
Finder.should be_up_to_date
|
83
|
-
end
|
84
|
-
|
78
|
+
|
85
79
|
it "should not warn anything if index is up to date"
|
86
80
|
|
87
81
|
it "should warn if index is not up to date"
|
88
82
|
|
89
83
|
fields.each_pair do |description,field_name|
|
90
84
|
it "should index #{description} as :#{field_name}" do
|
91
|
-
|
92
|
-
@finder.index.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
|
85
|
+
Finder.index.field_infos[field_name].should be_an_instance_of(Ferret::Index::FieldInfo)
|
93
86
|
end
|
94
87
|
end
|
95
88
|
|
@@ -2,9 +2,9 @@ require File.dirname(__FILE__) + '/../spec_helper'
|
|
2
2
|
|
3
3
|
basic_pdf_attribute={
|
4
4
|
:size=>9380,
|
5
|
-
:dirname=>'spec/test_dirs/indexed/basic',
|
5
|
+
:dirname=>File.join(RAILS_ROOT, 'spec/test_dirs/indexed/basic'),
|
6
6
|
:basename=>'basic',
|
7
|
-
:complete_path=>'spec/test_dirs/indexed/basic/basic.pdf',
|
7
|
+
:complete_path=>File.join(RAILS_ROOT, '/spec/test_dirs/indexed/basic/basic.pdf'),
|
8
8
|
:extname=>'.pdf',
|
9
9
|
:filename=>'basic.pdf'
|
10
10
|
}
|
@@ -1,13 +1,19 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/../spec_helper'
|
2
2
|
|
3
|
+
def revert_changes!(file,content)
|
4
|
+
File.open(file,'w'){|might_have_been_modified|
|
5
|
+
might_have_been_modified.write content
|
6
|
+
}
|
7
|
+
end
|
8
|
+
|
3
9
|
describe Finder do
|
4
10
|
before(:all) do
|
5
11
|
# SVN doesn't like non-ascii filenames.
|
6
|
-
|
7
|
-
weird_file.write "just to know if files are indexed with utf8 filenames"
|
8
|
-
}
|
9
|
-
|
12
|
+
revert_changes!('spec/test_dirs/indexed/others/bäñüßé.txt',"just to know if files are indexed with utf8 filenames")
|
10
13
|
|
14
|
+
# To be sure this file has the right content
|
15
|
+
revert_changes!("spec/test_dirs/indexed/others/placeholder.txt","Absorption and Adsorption cooling machines!!!")
|
16
|
+
|
11
17
|
once_upon_a_time=Time.local(1982,2,16,20,42)
|
12
18
|
a_bit_later=Time.local(1983,12,9,9)
|
13
19
|
nineties=Time.local(1990)
|
@@ -41,10 +47,9 @@ describe Finder do
|
|
41
47
|
end
|
42
48
|
|
43
49
|
it "should give a boost to basename, filename and filetype in index" do
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
@finder.index.field_infos[:filetype].boost.should > 1.0
|
50
|
+
Finder.index.field_infos[:basename].boost.should > 1.0
|
51
|
+
Finder.index.field_infos[:file].boost.should > 1.0
|
52
|
+
Finder.index.field_infos[:filetype].boost.should > 1.0
|
48
53
|
end
|
49
54
|
|
50
55
|
it "should also index unreadable files with known mimetypes" do
|
@@ -85,7 +90,7 @@ describe Finder do
|
|
85
90
|
|
86
91
|
it "should find documents according to their modification date" do
|
87
92
|
Finder.new("date:<1982").matching_documents.should be_empty
|
88
|
-
Finder.new("19831209").matching_document.basename.should == "office2003-word-template"
|
93
|
+
Finder.new("19831209*").matching_document.basename.should == "office2003-word-template"
|
89
94
|
Finder.new("date:<1983").matching_document.filename.should == "basic.pdf"
|
90
95
|
Finder.new("date:>=1989 AND date:<=1992").matching_document.filename.should == "placeholder.txt"
|
91
96
|
end
|
@@ -195,6 +200,22 @@ describe Finder do
|
|
195
200
|
Finder.new("Thumbs.db").matching_documents.should be_empty
|
196
201
|
Finder.new("filetype:db").matching_documents.should_not be_empty
|
197
202
|
end
|
203
|
+
|
204
|
+
it "should keep content cached" do
|
205
|
+
filename = "spec/test_dirs/indexed/others/placeholder.txt"
|
206
|
+
content_before = "Absorption and Adsorption cooling machines!!!"
|
207
|
+
some_doc=Document.new(filename)
|
208
|
+
some_doc.content.should == content_before
|
209
|
+
File.open(filename,'a'){|doc|
|
210
|
+
doc.write("This line should not be indexed. It shouldn't be found in cache")
|
211
|
+
}
|
212
|
+
some_doc.content.should_not == content_before
|
213
|
+
some_doc.cached.should == content_before
|
214
|
+
end
|
215
|
+
|
216
|
+
after(:all) do
|
217
|
+
revert_changes!("spec/test_dirs/indexed/others/placeholder.txt","Absorption and Adsorption cooling machines!!!")
|
218
|
+
end
|
198
219
|
|
199
220
|
# Not sure about this spec!
|
200
221
|
# English, or German?
|
@@ -7,17 +7,23 @@ describe "Host indexing system" do
|
|
7
7
|
end
|
8
8
|
end
|
9
9
|
|
10
|
-
it "should know which IP addresses are allowed (config/white_list_ip.yml)" do
|
11
|
-
File.should be_readable('config/white_list_ip.yml')
|
10
|
+
it "should know which IP addresses are allowed (config/custom/white_list_ip.yml)" do
|
11
|
+
File.should be_readable('config/custom/white_list_ip.yml')
|
12
12
|
end
|
13
13
|
|
14
|
-
it "should know which directories are to be indexed (config/indexed_directories.yml)" do
|
15
|
-
File.should be_readable('config/indexed_directories.yml')
|
14
|
+
it "should know which directories are to be indexed (config/custom/indexed_directories.yml)" do
|
15
|
+
File.should be_readable('config/custom/indexed_directories.yml')
|
16
16
|
end
|
17
17
|
|
18
|
-
# it would probably take ages to find a string whose hash == "picolena" :(
|
19
18
|
it "should be able to calculate base26 hash from strings" do
|
19
|
+
"test_dirs/indexed/010/decrepito.pdf".base26_hash(5).should == "rails"
|
20
|
+
"test_dirs/indexed/migrations/000_restreins.rb".base26_hash(5).should == "ricou"
|
21
|
+
# it would probably take ages to find a string whose hash == "picolena" :(
|
20
22
|
"test_dirs/indexed/1148/plots.odt".base26_hash(8).should == "picolehn"
|
21
|
-
"whatever.pdf".base26_hash.should == "bbuxhynait"
|
23
|
+
"whatever.pdf".base26_hash(10).should == "bbuxhynait"
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should not use too small a hash for Document#probably_unique_id" do
|
27
|
+
HashLength.should_not < 10
|
22
28
|
end
|
23
29
|
end
|
data/lib/picolena/version.rb
CHANGED
data/lib/rubigen_ext.rb
CHANGED
data/tasks/hack.rake
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
desc 'Create development picolena structure inside lib/picolena/templates'
|
2
|
+
task :lets_hack do
|
3
|
+
picolena_root=File.join(File.dirname(__FILE__),'..')
|
4
|
+
Dir.chdir(picolena_root){
|
5
|
+
system("ruby bin/picolena lib/picolena/templates/spec/test_dirs --skip --no-index --no-spec --destination=lib/picolena/templates")
|
6
|
+
}
|
7
|
+
puts <<-EXPLAIN
|
8
|
+
|
9
|
+
You now have a complete picolena installation in:
|
10
|
+
#{File.expand_path(File.join(File.dirname(__FILE__),'../lib/picolena/templates'))}
|
11
|
+
|
12
|
+
You can now hack and submit patches!
|
13
|
+
|
14
|
+
Once done, you can remove those files by typing:
|
15
|
+
rake clean
|
16
|
+
EXPLAIN
|
17
|
+
end
|
data/tasks/rspec.rake
CHANGED
@@ -1,20 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
end
|
7
|
-
begin
|
8
|
-
require 'spec/rake/spectask'
|
9
|
-
rescue LoadError
|
10
|
-
puts <<-EOS
|
11
|
-
To use rspec for testing you must install rspec gem:
|
12
|
-
gem install rspec
|
13
|
-
EOS
|
14
|
-
exit(0)
|
15
|
-
end
|
16
|
-
|
17
|
-
desc "Run the specs under spec/models"
|
18
|
-
Spec::Rake::SpecTask.new do |t|
|
19
|
-
t.spec_files = FileList['spec/**/*_spec.rb']
|
20
|
-
end
|
1
|
+
desc 'Create a temporary picolena structure and launch specs from it'
|
2
|
+
task :spec => :clean do
|
3
|
+
require 'pathname'
|
4
|
+
picolena_bin=Pathname(File.join(File.dirname(__FILE__),'../bin/picolena')).realpath.to_s
|
5
|
+
system("ruby #{picolena_bin} --spec-only")
|
6
|
+
end
|
data/tasks/test.rake
ADDED
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>Picolena</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/picolena"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.
|
36
|
+
<a href="http://rubyforge.org/projects/picolena" class="numbers">0.1.1</a>
|
37
37
|
</div>
|
38
38
|
<h1>→ ‘picolena’</h1>
|
39
39
|
|
@@ -114,7 +114,7 @@ ruby script/server</code></pre>
|
|
114
114
|
|
115
115
|
<p>Comments are welcome. Send an email to <a href="mailto:eric_duminil@rubyforge.org">Eric Duminil</a> email via the <a href="http://groups.google.com/group/picolena">forum</a></p>
|
116
116
|
<p class="coda">
|
117
|
-
<a href="eric_duminil@rubyforge.org">Eric DUMINIL</a>,
|
117
|
+
<a href="eric_duminil@rubyforge.org">Eric DUMINIL</a>, 12th April 2008<br>
|
118
118
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>,
|
119
119
|
by Daniel Cadenas via <a href="http://depgraph.rubyforge.org/">DepGraph</a>
|
120
120
|
</p>
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picolena
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Duminil
|
@@ -30,7 +30,7 @@ cert_chain:
|
|
30
30
|
qvI9FgPZ1QTG5uZAlBbk6d6JU2XfpA==
|
31
31
|
-----END CERTIFICATE-----
|
32
32
|
|
33
|
-
date: 2008-04-
|
33
|
+
date: 2008-04-12 00:00:00 +02:00
|
34
34
|
default_executable:
|
35
35
|
dependencies:
|
36
36
|
- !ruby/object:Gem::Dependency
|
@@ -87,6 +87,15 @@ dependencies:
|
|
87
87
|
- !ruby/object:Gem::Version
|
88
88
|
version: 1.1.0
|
89
89
|
version:
|
90
|
+
- !ruby/object:Gem::Dependency
|
91
|
+
name: rspec
|
92
|
+
version_requirement:
|
93
|
+
version_requirements: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: 1.1.3
|
98
|
+
version:
|
90
99
|
description: Picolena is a lightweight ferret-powered documents search engine written in Ruby on rails
|
91
100
|
email:
|
92
101
|
- eric_duminil@rubyforge.org
|
@@ -129,10 +138,13 @@ files:
|
|
129
138
|
- README.txt
|
130
139
|
- Rakefile
|
131
140
|
- bin/picolena
|
141
|
+
- config/files_to_clean
|
132
142
|
- config/hoe.rb
|
133
143
|
- config/requirements.rb
|
134
144
|
- lib/picolena/USAGE
|
145
|
+
- lib/picolena/config/basic.rb
|
135
146
|
- lib/picolena/config/indexed_directories.yml
|
147
|
+
- lib/picolena/config/title_and_names_and_links.yml
|
136
148
|
- lib/picolena/config/white_list_ip.yml
|
137
149
|
- lib/picolena/picolena_generator.rb
|
138
150
|
- lib/picolena/templates/MIT-LICENSE
|
@@ -144,19 +156,21 @@ files:
|
|
144
156
|
- lib/picolena/templates/app/models/document.rb
|
145
157
|
- lib/picolena/templates/app/models/finder.rb
|
146
158
|
- lib/picolena/templates/app/views/documents/_document.html.haml
|
159
|
+
- lib/picolena/templates/app/views/documents/cached.html.haml
|
160
|
+
- lib/picolena/templates/app/views/documents/content.html.haml
|
147
161
|
- lib/picolena/templates/app/views/documents/index.html.haml
|
148
162
|
- lib/picolena/templates/app/views/documents/show.html.haml
|
149
|
-
- lib/picolena/templates/app/views/documents/show_content.html.haml
|
150
163
|
- lib/picolena/templates/app/views/layouts/application.html.haml
|
151
164
|
- lib/picolena/templates/config/boot.rb
|
152
165
|
- lib/picolena/templates/config/environment.rb
|
153
166
|
- lib/picolena/templates/config/environments/development.rb
|
154
167
|
- lib/picolena/templates/config/environments/production.rb
|
155
168
|
- lib/picolena/templates/config/environments/test.rb
|
156
|
-
- lib/picolena/templates/config/initializers/
|
157
|
-
- lib/picolena/templates/config/initializers/
|
158
|
-
- lib/picolena/templates/config/initializers/
|
169
|
+
- lib/picolena/templates/config/initializers/001_load_custom_config.rb
|
170
|
+
- lib/picolena/templates/config/initializers/002_load_indexed_dirs.rb
|
171
|
+
- lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb
|
159
172
|
- lib/picolena/templates/config/initializers/004_load_filters.rb
|
173
|
+
- lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb
|
160
174
|
- lib/picolena/templates/config/routes.rb
|
161
175
|
- lib/picolena/templates/lang/ui/de.yml
|
162
176
|
- lib/picolena/templates/lang/ui/en.yml
|
@@ -890,13 +904,13 @@ files:
|
|
890
904
|
- script/generate
|
891
905
|
- script/txt2html
|
892
906
|
- setup.rb
|
893
|
-
- spec/picolena_spec.rb
|
894
|
-
- tasks/default.rake
|
895
907
|
- tasks/deployment.rake
|
896
908
|
- tasks/environment.rake
|
909
|
+
- tasks/hack.rake
|
897
910
|
- tasks/rdoc.rake
|
898
911
|
- tasks/ri.rake
|
899
912
|
- tasks/rspec.rake
|
913
|
+
- tasks/test.rake
|
900
914
|
- tasks/website.rake
|
901
915
|
- website/index.html
|
902
916
|
- website/index.txt
|
@@ -931,5 +945,5 @@ rubygems_version: 1.1.0
|
|
931
945
|
signing_key:
|
932
946
|
specification_version: 2
|
933
947
|
summary: Picolena is a lightweight ferret-powered documents search engine written in Ruby on rails
|
934
|
-
test_files:
|
935
|
-
|
948
|
+
test_files: []
|
949
|
+
|
metadata.gz.sig
CHANGED
Binary file
|
@@ -1,8 +0,0 @@
|
|
1
|
-
#Deny all, Allow only IPs described in config/white_list_ip.yml
|
2
|
-
white_list_ip_config_file=['config/white_list_ip.yml', 'config/white_list_ip.yml.template'].find{|f| File.file?(f)}
|
3
|
-
WhiteListIPs=Regexp.new(
|
4
|
-
"^("<<
|
5
|
-
YAML.load_file(white_list_ip_config_file)["Allow"].collect{|ip|
|
6
|
-
ip.downcase.include?("all") ? /.*/ : Regexp.escape(ip)
|
7
|
-
}.join("|")<<")"
|
8
|
-
) rescue /^(127\.0\.0\.1|0\.0\.0\.0)/
|
data/spec/picolena_spec.rb
DELETED
data/tasks/default.rake
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
Rake::Task[:default].prerequisites.clear
|