pms 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog ADDED
@@ -0,0 +1,5 @@
1
+ = Revision history for pms
2
+
3
+ == 0.0.1 [2008-12-06]
4
+
5
+ * Birthday :-)
data/README ADDED
@@ -0,0 +1,71 @@
1
+ = pms - Poor Man's Search
2
+
3
+ == VERSION
4
+
5
+ This documentation refers to pms version 0.0.1
6
+
7
+
8
+ == DESCRIPTION
9
+
10
+ Provides a simple searching facility for (nearly) arbitrary input. It allows
11
+ searching by Strings (exact match) or Regexps (full control, but slower).
12
+ Boolean operators AND, OR, and NOT are supported, as well as proximity
13
+ operators NEAR (with configurable distance) and ADJACENT (taking order into
14
+ account). You can chain operators and also group them into sub-queries.
15
+
16
+ Inspiration came from a discussion on ruby-talk, starting with message
17
+ ruby-talk:322014[http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/322014],
18
+ but PMS has a more document-centric attitude (everything that <tt>input#each</tt>
19
+ yields is considered a document of its own).
20
+
21
+ Example:
22
+
23
+ require 'pms/ext'
24
+
25
+ text = <<EOT
26
+ Hello world, how are you today? I said "Hello"
27
+ to the other guy but he would not answer although
28
+ all the world could hear me.
29
+ EOT
30
+
31
+ search = text.search('hello').near('world', 3)
32
+
33
+ p search.results
34
+ #=> [0]
35
+
36
+ p search.results_with_positions
37
+ #=> {0=>[0, 8]}
38
+
39
+ p search.matches
40
+ #=> ["Hello world, how are you today? I said \"Hello\"\n"]
41
+
42
+ You can find more examples in the <tt>spec/</tt> directory.
43
+
44
+
45
+ == LINKS
46
+
47
+ * <http://pms.rubyforge.org/>
48
+ * <http://rubyforge.org/projects/pms>
49
+ * <http://github.com/blackwinter/pms>
50
+
51
+
52
+ == AUTHORS
53
+
54
+ * Jens Wille <mailto:jens.wille@uni-koeln.de>
55
+
56
+
57
+ == LICENSE AND COPYRIGHT
58
+
59
+ Copyright (C) 2008 Jens Wille
60
+
61
+ pms is free software: you can redistribute it and/or modify it under the
62
+ terms of the GNU General Public License as published by the Free Software
63
+ Foundation, either version 3 of the License, or (at your option) any later
64
+ version.
65
+
66
+ pms is distributed in the hope that it will be useful, but WITHOUT ANY
67
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
68
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
69
+
70
+ You should have received a copy of the GNU General Public License along with
71
+ pms. If not, see <http://www.gnu.org/licenses/>.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require %q{lib/pms/version}
2
+
3
+ begin
4
+ require 'hen'
5
+
6
+ Hen.lay! {{
7
+ :rubyforge => {
8
+ :project => %q{pms},
9
+ :package => %q{pms},
10
+ :rdoc_dir => nil
11
+ },
12
+
13
+ :gem => {
14
+ :version => PMS::VERSION,
15
+ :summary => %q{Poor Man's Search},
16
+ :homepage => %q{http://pms.rubyforge.org/},
17
+ :files => FileList['lib/**/*.rb'].to_a,
18
+ :extra_files => FileList['[A-Z]*', 'spec/**/*.rb', 'test_data/**/*'].to_a,
19
+ :dependencies => %w[]
20
+ }
21
+ }}
22
+ rescue LoadError
23
+ abort "Please install the 'hen' gem first."
24
+ end
25
+
26
+ ### Place your custom Rake tasks here.
data/lib/pms.rb ADDED
@@ -0,0 +1,51 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # pms - Poor Man's Search. #
5
+ # #
6
+ # Copyright (C) 2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <jens.wille@uni-koeln.de> #
10
+ # #
11
+ # pms is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU General Public License as published by the Free Software #
13
+ # Foundation; either version 3 of the License, or (at your option) any later #
14
+ # version. #
15
+ # #
16
+ # pms is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with pms. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'pms/index'
28
+ require 'pms/proxy'
29
+
30
+ class PMS
31
+
32
+ attr_reader :input, :index
33
+
34
+ def initialize(input)
35
+ @input = input
36
+ @index = Index.new(input)
37
+ end
38
+
39
+ def search(token = nil)
40
+ token ? TokenProxy.new(self, token) : Proxy.new(self).and { |*a| yield(*a) }
41
+ end
42
+
43
+ def results
44
+ @index.entries
45
+ end
46
+
47
+ def matches
48
+ @index.matches
49
+ end
50
+
51
+ end
data/lib/pms/ext.rb ADDED
@@ -0,0 +1,49 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of pms, Poor Man's Search. #
5
+ # #
6
+ # Copyright (C) 2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <jens.wille@uni-koeln.de> #
10
+ # #
11
+ # pms is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU General Public License as published by the Free Software #
13
+ # Foundation; either version 3 of the License, or (at your option) any later #
14
+ # version. #
15
+ # #
16
+ # pms is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with pms. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'pms'
28
+
29
+ module PMS::Ext
30
+
31
+ RECEIVERS = [String, IO, Array].freeze
32
+
33
+ def search(*args)
34
+ PMS.new(self).search(*args)
35
+ end
36
+
37
+ RECEIVERS.each { |klass|
38
+ klass.send(:include, self)
39
+ }
40
+
41
+ end
42
+
43
+ class File
44
+
45
+ def self.search(file, *args)
46
+ File.open(file.respond_to?(:path) ? file.path : file) { |f| f.search(*args) }
47
+ end
48
+
49
+ end
data/lib/pms/index.rb ADDED
@@ -0,0 +1,112 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of pms, Poor Man's Search. #
5
+ # #
6
+ # Copyright (C) 2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <jens.wille@uni-koeln.de> #
10
+ # #
11
+ # pms is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU General Public License as published by the Free Software #
13
+ # Foundation; either version 3 of the License, or (at your option) any later #
14
+ # version. #
15
+ # #
16
+ # pms is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with pms. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class PMS
28
+
29
+ class Index
30
+
31
+ TOKEN_RE = %r{\w+}o
32
+
33
+ attr_reader :input, :index, :entries
34
+
35
+ def initialize(input)
36
+ raise ArgumentError, "input must implement #each" unless input.respond_to?(:each)
37
+
38
+ @input = input
39
+ @index = Hash.new { |h, k| h[k] = Hash.new { |i, j| i[j] = [] } }
40
+
41
+ build_index
42
+ end
43
+
44
+ def doc_nums_with_positions(token)
45
+ case token
46
+ when String
47
+ index[mangle_token(token)]
48
+ when Regexp
49
+ res = {}
50
+
51
+ index.each { |key, value|
52
+ res.update(value) { |_, old, new| old | new } if key =~ token
53
+ }
54
+
55
+ res
56
+ else
57
+ raise TypeError, "expected String or Regexp, got #{token.class}"
58
+ end
59
+ end
60
+
61
+ alias_method :results_with_positions, :doc_nums_with_positions
62
+
63
+ def doc_nums(token)
64
+ doc_nums_with_positions(token).keys
65
+ end
66
+
67
+ alias_method :results, :doc_nums
68
+
69
+ def documents(doc_nums = default = Object.new)
70
+ @documents ||= get_documents
71
+ default ? @documents : doc_nums.map { |doc_num| @documents[doc_num] }
72
+ end
73
+
74
+ alias_method :matches, :documents
75
+
76
+ def doc(doc_num)
77
+ documents([doc_num]).first
78
+ end
79
+
80
+ alias_method :[], :doc
81
+
82
+ private
83
+
84
+ def build_index
85
+ @documents, @entries = nil, []
86
+ doc_num = -1
87
+
88
+ input.each { |doc|
89
+ @entries << doc_num += 1
90
+ pos = -1
91
+
92
+ doc.scan(TOKEN_RE) { |token|
93
+ index[mangle_token(token)][doc_num] << pos += 1
94
+ }
95
+ }
96
+ end
97
+
98
+ def get_documents
99
+ input.rewind if input.respond_to?(:rewind)
100
+
101
+ docs = []
102
+ input.each { |doc| docs << doc }
103
+ docs
104
+ end
105
+
106
+ def mangle_token(token)
107
+ token.downcase
108
+ end
109
+
110
+ end
111
+
112
+ end
data/lib/pms/proxy.rb ADDED
@@ -0,0 +1,133 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # A component of pms, Poor Man's Search. #
5
+ # #
6
+ # Copyright (C) 2008 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <jens.wille@uni-koeln.de> #
10
+ # #
11
+ # pms is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU General Public License as published by the Free Software #
13
+ # Foundation; either version 3 of the License, or (at your option) any later #
14
+ # version. #
15
+ # #
16
+ # pms is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more #
19
+ # details. #
20
+ # #
21
+ # You should have received a copy of the GNU General Public License along #
22
+ # with pms. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class PMS
28
+
29
+ class Proxy
30
+
31
+ attr_reader :pms, :index, :results
32
+
33
+ def initialize(pms)
34
+ @pms = pms
35
+ @index = pms.index
36
+ @results = pms.results
37
+ end
38
+
39
+ def and(token = nil)
40
+ token ? apply_operator_with_token('and', token) :
41
+ apply_operator_with_block('and') { |*a| yield(*a) }
42
+ end
43
+
44
+ def or(token = nil)
45
+ token ? apply_operator_with_token('or', token) :
46
+ apply_operator_with_block('or') { |*a| yield(*a) }
47
+ end
48
+
49
+ def not(token = nil)
50
+ token ? apply_operator_with_token('not', token) :
51
+ apply_operator_with_block('not') { |*a| yield(*a) }
52
+ end
53
+
54
+ def matches
55
+ index.matches(results)
56
+ end
57
+
58
+ private
59
+
60
+ def apply_operator_with_token(op, token)
61
+ apply_operator(op, index.results(token))
62
+ end
63
+
64
+ def apply_operator_with_block(op)
65
+ case sub = yield(pms)
66
+ when Proxy
67
+ apply_operator(op, sub.results)
68
+ else
69
+ raise "sub-query must return a PMS::Proxy object (got #{sub.class})"
70
+ end
71
+ end
72
+
73
+ def apply_operator(op, doc_nums)
74
+ case op = op.to_s.downcase
75
+ when 'and'
76
+ @results &= doc_nums
77
+ when 'or'
78
+ @results |= doc_nums
79
+ when 'not'
80
+ @results -= doc_nums
81
+ else
82
+ raise ArgumentError, "invalid operator '#{op}'"
83
+ end
84
+
85
+ self # allow chaining!
86
+ end
87
+
88
+ end
89
+
90
+ class TokenProxy < Proxy
91
+
92
+ attr_reader :token, :results_with_positions
93
+
94
+ def initialize(pms, token)
95
+ super(pms)
96
+
97
+ @token = token
98
+
99
+ @results_with_positions = index.results_with_positions(token)
100
+ @results = @results_with_positions.keys
101
+ end
102
+
103
+ def near(token, distance = 1, order = false)
104
+ results1 = results_with_positions
105
+ results2 = index.results_with_positions(token)
106
+
107
+ doc_nums = results1.keys & results2.keys
108
+
109
+ # TODO: i'm sure this can be simplified...
110
+ doc_nums.delete_if { |doc_num|
111
+ positions = results2[doc_num]
112
+
113
+ !results1[doc_num].any? { |pos1|
114
+ positions.find { |pos2|
115
+ diff = pos2 - pos1
116
+
117
+ break if order && diff < 0
118
+
119
+ diff.abs <= distance
120
+ }
121
+ }
122
+ }
123
+
124
+ apply_operator('and', doc_nums)
125
+ end
126
+
127
+ def adjacent(token, distance = 1)
128
+ near(token, distance, true)
129
+ end
130
+
131
+ end
132
+
133
+ end