ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
@@ -1,47 +0,0 @@
|
|
1
|
-
module Ferret::Utils
|
2
|
-
module StringHelper
|
3
|
-
# Methods for manipulating strings.
|
4
|
-
|
5
|
-
class StringReader
|
6
|
-
attr_reader :length
|
7
|
-
|
8
|
-
def initialize(str)
|
9
|
-
@str = str
|
10
|
-
@pointer = 0
|
11
|
-
@length = @str.length
|
12
|
-
end
|
13
|
-
|
14
|
-
def read(len = nil)
|
15
|
-
return @str if len.nil?
|
16
|
-
|
17
|
-
return nil if @pointer > @length
|
18
|
-
|
19
|
-
res = @str[@pointer, len]
|
20
|
-
@pointer += len
|
21
|
-
return res
|
22
|
-
end
|
23
|
-
|
24
|
-
def reset() @pointer = 0 end
|
25
|
-
|
26
|
-
def close() str = nil end
|
27
|
-
end
|
28
|
-
|
29
|
-
# Compares two strings, character by character, and returns the
|
30
|
-
# first position where the two strings differ from one another.
|
31
|
-
# eg.
|
32
|
-
# string_difference('dustbin', 'dusty') # => 4
|
33
|
-
# string_difference('dustbin', 'evening') # => 0
|
34
|
-
# string_difference('eve', 'evening') # => 3
|
35
|
-
#
|
36
|
-
# s1:: The first string to compare
|
37
|
-
# s2:: The second string to compare
|
38
|
-
# returns:: The first position where the two strings differ.
|
39
|
-
def StringHelper.string_difference(s1, s2)
|
40
|
-
len = [s1.length, s2.length].min
|
41
|
-
len.times do |i|
|
42
|
-
return i if (s1[i] != s2[i])
|
43
|
-
end
|
44
|
-
return len
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
@@ -1,28 +0,0 @@
|
|
1
|
-
require 'thread'
|
2
|
-
class Thread
|
3
|
-
def make_deleter
|
4
|
-
lambda{|id| @ferret_cache.delete(id)}
|
5
|
-
end
|
6
|
-
|
7
|
-
# Set the local value for the thread
|
8
|
-
def set_local(key, value)
|
9
|
-
@del ||= make_deleter
|
10
|
-
@ferret_cache ||= {}
|
11
|
-
ObjectSpace.define_finalizer(key, @del)
|
12
|
-
@ferret_cache[key.object_id] = value
|
13
|
-
end
|
14
|
-
|
15
|
-
# Get the local value for the thread
|
16
|
-
def get_local(key)
|
17
|
-
return (@ferret_cache ||= {})[key.object_id]
|
18
|
-
end
|
19
|
-
|
20
|
-
# Returns the number of local variables stored. Useful for testing.
|
21
|
-
def local_size
|
22
|
-
return (@ferret_cache ||= {}).size
|
23
|
-
end
|
24
|
-
|
25
|
-
def clear_local
|
26
|
-
(@ferret_cache ||= {}).clear
|
27
|
-
end
|
28
|
-
end
|
@@ -1,60 +0,0 @@
|
|
1
|
-
module Ferret::Utils
|
2
|
-
|
3
|
-
require 'weakref'
|
4
|
-
require 'monitor'
|
5
|
-
|
6
|
-
# This class implements a weak key hash. ie all keys that are stored in this
|
7
|
-
# hash can still be garbage collected, and if they are garbage collected
|
8
|
-
# then the key and it's corresponding value will be deleted from the hash.
|
9
|
-
# eg.
|
10
|
-
# name = "david"
|
11
|
-
# last_names = WeakKeyHash.new()
|
12
|
-
# last_names[name] = "balmain"
|
13
|
-
# puts last_names["david"] #=>"balmain"
|
14
|
-
# GC.start
|
15
|
-
# puts last_names["david"] #=>"balmain"
|
16
|
-
# name = nil
|
17
|
-
# GC.start
|
18
|
-
# # the name "david" will now have been garbage collected so it should
|
19
|
-
# # have been removed from the hash
|
20
|
-
# puts last_names["david"] #=>nil
|
21
|
-
#
|
22
|
-
# WeakKeyHash subclasses Monitor so it can be synchronized on.
|
23
|
-
#
|
24
|
-
# === NOTE
|
25
|
-
# Unfortunately the ruby garbage collector is not always predictable so your
|
26
|
-
# results may differ but each key should eventually be freed when all other
|
27
|
-
# references have been removed and the garbage collector is ready.
|
28
|
-
class WeakKeyHash < Monitor
|
29
|
-
# Create a new WeakKeyHash.
|
30
|
-
def initialize
|
31
|
-
super()
|
32
|
-
@hash = {}
|
33
|
-
@deleter = lambda{|id| @hash.delete(id)}
|
34
|
-
end
|
35
|
-
|
36
|
-
# Set the value for the key just like a Hash
|
37
|
-
def []=(key, value)
|
38
|
-
ObjectSpace.define_finalizer(key, @deleter)
|
39
|
-
@hash[key.object_id] = value
|
40
|
-
end
|
41
|
-
|
42
|
-
# Get the value for the key
|
43
|
-
def [](key)
|
44
|
-
return @hash[key.object_id]
|
45
|
-
end
|
46
|
-
|
47
|
-
# Return the number of elements in the Hash
|
48
|
-
def size
|
49
|
-
@hash.size
|
50
|
-
end
|
51
|
-
|
52
|
-
# Print a string representation the WeakKeyHash
|
53
|
-
def to_s
|
54
|
-
buffer = ""
|
55
|
-
@hash.each_pair {|key, value| buffer << "<#{ObjectSpace._id2ref(key)}=>#{value}>"}
|
56
|
-
return buffer
|
57
|
-
end
|
58
|
-
|
59
|
-
end
|
60
|
-
end
|
data/lib/rferret.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
$: << File.dirname(__FILE__)
|
2
|
-
#--
|
3
|
-
# Copyright (c) 2005 David Balmain
|
4
|
-
#
|
5
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
-
# a copy of this software and associated documentation files (the
|
7
|
-
# "Software"), to deal in the Software without restriction, including
|
8
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
-
# the following conditions:
|
12
|
-
#
|
13
|
-
# The above copyright notice and this permission notice shall be
|
14
|
-
# included in all copies or substantial portions of the Software.
|
15
|
-
#
|
16
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
-
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
-
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
-
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
-
#++
|
24
|
-
# :include: ../TUTORIAL
|
25
|
-
module Ferret
|
26
|
-
VERSION = '0.9.6'
|
27
|
-
end
|
28
|
-
|
29
|
-
$ferret_pure_ruby = true
|
30
|
-
require 'ferret/utils'
|
31
|
-
require 'ferret/document'
|
32
|
-
require 'ferret/stemmers'
|
33
|
-
require 'ferret/analysis'
|
34
|
-
require 'ferret/store'
|
35
|
-
require 'ferret/index'
|
36
|
-
require 'ferret/search'
|
37
|
-
require 'ferret/query_parser'
|
@@ -1,106 +0,0 @@
|
|
1
|
-
# This code was taken from rails and is under the same license as ferret.
|
2
|
-
# Thanks go to David Heinemeier Hansson
|
3
|
-
class CodeStatistics
|
4
|
-
def initialize(*pairs)
|
5
|
-
@pairs = pairs
|
6
|
-
@statistics = calculate_statistics
|
7
|
-
@total = calculate_total if pairs.length > 1
|
8
|
-
end
|
9
|
-
|
10
|
-
def to_s
|
11
|
-
print_header
|
12
|
-
@pairs.each { |pair| print_line(pair.first, @statistics[pair.first]) }
|
13
|
-
print_splitter
|
14
|
-
|
15
|
-
if @total
|
16
|
-
print_line("Total", @total)
|
17
|
-
print_splitter
|
18
|
-
end
|
19
|
-
|
20
|
-
print_code_test_stats
|
21
|
-
end
|
22
|
-
|
23
|
-
private
|
24
|
-
def calculate_statistics
|
25
|
-
@pairs.inject({}) { |stats, pair| stats[pair.first] = calculate_directory_statistics(pair.last); stats }
|
26
|
-
end
|
27
|
-
|
28
|
-
def calculate_directory_statistics(directory, pattern = /.*\.rb$/)
|
29
|
-
stats = { "lines" => 0, "codelines" => 0, "classes" => 0, "methods" => 0 }
|
30
|
-
|
31
|
-
Dir.foreach(directory) do |file_name|
|
32
|
-
if File.stat(directory + "/" + file_name).directory? and (/^\./ !~ file_name)
|
33
|
-
newstats = calculate_directory_statistics(directory + "/" + file_name, pattern)
|
34
|
-
stats.each { |k, v| stats[k] += newstats[k] }
|
35
|
-
end
|
36
|
-
|
37
|
-
next unless file_name =~ pattern
|
38
|
-
|
39
|
-
f = File.open(directory + "/" + file_name)
|
40
|
-
|
41
|
-
while line = f.gets
|
42
|
-
stats["lines"] += 1
|
43
|
-
stats["classes"] += 1 if line =~ /class [A-Z]/
|
44
|
-
stats["methods"] += 1 if line =~ /def [a-z]/
|
45
|
-
stats["codelines"] += 1 unless line =~ /^\s*$/ || line =~ /^\s*#/
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
stats
|
50
|
-
end
|
51
|
-
|
52
|
-
def calculate_total
|
53
|
-
total = { "lines" => 0, "codelines" => 0, "classes" => 0, "methods" => 0 }
|
54
|
-
@statistics.each_value { |pair| pair.each { |k, v| total[k] += v } }
|
55
|
-
total
|
56
|
-
end
|
57
|
-
|
58
|
-
def calculate_code
|
59
|
-
code_loc = 0
|
60
|
-
@statistics.each { |k, v| code_loc += v['codelines'] unless ['Units', 'Functionals'].include? k }
|
61
|
-
code_loc
|
62
|
-
end
|
63
|
-
|
64
|
-
def calculate_tests
|
65
|
-
test_loc = 0
|
66
|
-
@statistics.each { |k, v| test_loc += v['codelines'] if ['Units', 'Functionals'].include? k }
|
67
|
-
test_loc
|
68
|
-
end
|
69
|
-
|
70
|
-
def print_header
|
71
|
-
print_splitter
|
72
|
-
puts "| Name | Lines | LOC | Classes | Methods | M/C | LOC/M |"
|
73
|
-
print_splitter
|
74
|
-
end
|
75
|
-
|
76
|
-
def print_splitter
|
77
|
-
puts "+----------------------+-------+-------+---------+---------+-----+-------+"
|
78
|
-
end
|
79
|
-
|
80
|
-
def print_line(name, statistics)
|
81
|
-
m_over_c = (statistics["methods"] / statistics["classes"]) rescue m_over_c = 0
|
82
|
-
loc_over_m = (statistics["codelines"] / statistics["methods"]) - 2 rescue loc_over_m = 0
|
83
|
-
|
84
|
-
start = if ['Units', 'Functionals'].include? name
|
85
|
-
"| #{name.ljust(18)} "
|
86
|
-
else
|
87
|
-
"| #{name.ljust(20)} "
|
88
|
-
end
|
89
|
-
|
90
|
-
puts start +
|
91
|
-
"| #{statistics["lines"].to_s.rjust(5)} " +
|
92
|
-
"| #{statistics["codelines"].to_s.rjust(5)} " +
|
93
|
-
"| #{statistics["classes"].to_s.rjust(7)} " +
|
94
|
-
"| #{statistics["methods"].to_s.rjust(7)} " +
|
95
|
-
"| #{m_over_c.to_s.rjust(3)} " +
|
96
|
-
"| #{loc_over_m.to_s.rjust(5)} |"
|
97
|
-
end
|
98
|
-
|
99
|
-
def print_code_test_stats
|
100
|
-
code = calculate_code
|
101
|
-
tests = calculate_tests
|
102
|
-
|
103
|
-
puts " Code LOC: #{code} Test LOC: #{tests} Code to Test Ratio: 1:#{sprintf("%.1f", tests.to_f/code)}"
|
104
|
-
puts ""
|
105
|
-
end
|
106
|
-
end
|
@@ -1,76 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
-
require 'benchmark'
|
3
|
-
|
4
|
-
class RAMStoreTest < Test::Unit::TestCase
|
5
|
-
def setup
|
6
|
-
@dir = Ferret::Store::RAMDirectory.new
|
7
|
-
end
|
8
|
-
|
9
|
-
def teardown
|
10
|
-
@dir.close()
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_rw_bytes
|
14
|
-
bytes = [0x34, 0x87, 0xF9, 0xEA, 0x00, 0xFF]
|
15
|
-
rw_test(bytes, "byte")
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_rw_ints
|
19
|
-
ints = [-2147483648, 2147483647, -1, 0]
|
20
|
-
rw_test(ints, "int")
|
21
|
-
end
|
22
|
-
|
23
|
-
def test_rw_longs
|
24
|
-
longs = [-9223372036854775808, 9223372036854775807, -1, 0]
|
25
|
-
rw_test(longs, "long")
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_rw_uints
|
29
|
-
uints = [0xffffffff, 100000, 0]
|
30
|
-
rw_test(uints, "uint")
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_rw_ulongs
|
34
|
-
ulongs = [0xffffffffffffffff, 100000000000000, 0]
|
35
|
-
rw_test(ulongs, "ulong")
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_rw_vints
|
39
|
-
vints = [ 0xF8DC843342FE3484234987FE98AB987C897D214D123D123458EFBE2E238BACDEB9878790ABCDEF123DEF23988B89C,
|
40
|
-
0x0000000000000000000000000000000000000000,
|
41
|
-
0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF]
|
42
|
-
rw_test(vints, "vint")
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_rw_vlongs
|
46
|
-
vlongs = [ 0xF8DC843342FE3484234987FE98AB987C897D214D123D123458EFBE2E238BACDEB9878790ABCDEF123DEF23988B89C,
|
47
|
-
0x0000000000000000000000000000000000000000,
|
48
|
-
0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF]
|
49
|
-
rw_test(vlongs, "vlong")
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_rw_strings
|
53
|
-
strings = ['This is a ruby ferret test string ~!@#$%^&*()`123456790-=\)_+|', 'This is another string. I\'ll make this one a little longer than the last one. But I guess we need a few shorter ones too.', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten']
|
54
|
-
rw_test(strings, "string")
|
55
|
-
end
|
56
|
-
|
57
|
-
# this test fills up the output stream so that the buffer will have to be
|
58
|
-
# written a few times. It then uses seek to make sure that it works
|
59
|
-
# correctly
|
60
|
-
|
61
|
-
def rw_test(values, type)
|
62
|
-
puts "\nrw_#{type} test"
|
63
|
-
Benchmark.bmbm do |x|
|
64
|
-
x.report("write") do
|
65
|
-
ostream = @dir.create_output("rw_#{type}.test")
|
66
|
-
1000.times {values.each { |b| ostream.__send__("write_" + type, b) }}
|
67
|
-
ostream.close
|
68
|
-
end
|
69
|
-
x.report("read") do
|
70
|
-
istream = @dir.open_input("rw_#{type}.test")
|
71
|
-
1000.times {values.each { |b| assert_equal(b, istream.__send__("read_" + type), "#{type} should be equal") }}
|
72
|
-
istream.close
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
$:.unshift File.join(File.dirname(__FILE__), '../../lib')
|
2
|
-
|
3
|
-
require 'ferret'
|
4
|
-
|
5
|
-
vints = [ 9223372036854775807,
|
6
|
-
0x00,
|
7
|
-
0xFFFFFFFFFFFFFFFF]
|
8
|
-
t = Time.new
|
9
|
-
10.times do
|
10
|
-
dpath = File.join(File.dirname(__FILE__),
|
11
|
-
'fsdir')
|
12
|
-
dir = Ferret::Store::FSDirectory.new(dpath, true)
|
13
|
-
|
14
|
-
100.times do
|
15
|
-
ostream = dir.create_output("rw_vint.test")
|
16
|
-
300.times { |i| ostream.write_vint(vints[i%3]) }
|
17
|
-
ostream.close
|
18
|
-
istream = dir.open_input("rw_vint.test")
|
19
|
-
300.times { istream.read_vint }
|
20
|
-
istream.close
|
21
|
-
end
|
22
|
-
|
23
|
-
dir.close
|
24
|
-
end
|
25
|
-
|
26
|
-
puts "took #{Time.new - t} seconds"
|
@@ -1,81 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../test_helper"
|
2
|
-
require File.dirname(__FILE__) + "/../utils/number_to_spoken.rb"
|
3
|
-
require 'thread'
|
4
|
-
|
5
|
-
class IndexThreadSafetyTest < Test::Unit::TestCase
|
6
|
-
include Ferret::Index
|
7
|
-
include Ferret::Document
|
8
|
-
|
9
|
-
INDEX_DIR = File.expand_path(File.join(File.dirname(__FILE__), "index"))
|
10
|
-
ITERATIONS = 100
|
11
|
-
NUM_THREADS = 10
|
12
|
-
ANALYZER = Ferret::Analysis::Analyzer.new()
|
13
|
-
|
14
|
-
def setup
|
15
|
-
@index = Index.new(:path => 'index2',
|
16
|
-
:create => true,
|
17
|
-
:analyzer => ANALYZER,
|
18
|
-
:default_field => 'contents')
|
19
|
-
end
|
20
|
-
|
21
|
-
def indexing_thread()
|
22
|
-
ITERATIONS.times do
|
23
|
-
choice = rand()
|
24
|
-
|
25
|
-
if choice > 0.98
|
26
|
-
do_optimize
|
27
|
-
elsif choice > 0.9
|
28
|
-
do_delete_doc
|
29
|
-
elsif choice > 0.7
|
30
|
-
do_search
|
31
|
-
else
|
32
|
-
do_add_doc
|
33
|
-
end
|
34
|
-
end
|
35
|
-
rescue => e
|
36
|
-
puts e
|
37
|
-
puts e.backtrace
|
38
|
-
@index = nil
|
39
|
-
raise e
|
40
|
-
end
|
41
|
-
|
42
|
-
def do_optimize
|
43
|
-
puts "Optimizing the index"
|
44
|
-
@index.optimize
|
45
|
-
end
|
46
|
-
|
47
|
-
def do_delete_doc
|
48
|
-
return if @index.size == 0
|
49
|
-
doc_num = rand(@index.size)
|
50
|
-
puts "Deleting #{doc_num} from index which has#{@index.has_deletions? ? "" : " no"} deletions"
|
51
|
-
puts "document was already deleted" if (@index.deleted?(doc_num))
|
52
|
-
@index.delete(doc_num)
|
53
|
-
end
|
54
|
-
|
55
|
-
def do_add_doc
|
56
|
-
d = Document.new()
|
57
|
-
n = rand(0xFFFFFFFF)
|
58
|
-
d << Field.new("id", n.to_s, Field::Store::YES, Field::Index::UNTOKENIZED)
|
59
|
-
d << Field.new("contents", n.to_spoken, Field::Store::NO, Field::Index::TOKENIZED)
|
60
|
-
puts("Adding #{n}")
|
61
|
-
@index << d
|
62
|
-
end
|
63
|
-
|
64
|
-
def do_search
|
65
|
-
n = rand(0xFFFFFFFF)
|
66
|
-
puts("Searching for #{n}")
|
67
|
-
hits = @index.search_each(n.to_spoken, :num_docs => 3) do |d, s|
|
68
|
-
puts "Hit for #{n}: #{@index[d]["id"]} - #{s}"
|
69
|
-
end
|
70
|
-
puts("Searched for #{n}: total = #{hits}")
|
71
|
-
end
|
72
|
-
|
73
|
-
def test_threading
|
74
|
-
threads = []
|
75
|
-
NUM_THREADS.times do
|
76
|
-
threads << Thread.new { indexing_thread }
|
77
|
-
end
|
78
|
-
|
79
|
-
threads.each {|t| t.join}
|
80
|
-
end
|
81
|
-
end
|