greeb 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ greeb (0.0.2)
5
+ rspec (~> 2.4.0)
6
+
7
+ GEM
8
+ remote: http://rubygems.org/
9
+ specs:
10
+ diff-lcs (1.1.2)
11
+ rspec (2.4.0)
12
+ rspec-core (~> 2.4.0)
13
+ rspec-expectations (~> 2.4.0)
14
+ rspec-mocks (~> 2.4.0)
15
+ rspec-core (2.4.0)
16
+ rspec-expectations (2.4.0)
17
+ diff-lcs (~> 1.1.2)
18
+ rspec-mocks (2.4.0)
19
+
20
+ PLATFORMS
21
+ ruby
22
+
23
+ DEPENDENCIES
24
+ greeb!
data/Rakefile CHANGED
@@ -2,3 +2,11 @@
2
2
 
3
3
  require 'bundler'
4
4
  Bundler::GemHelper.install_tasks
5
+
6
+ require 'rspec/core/rake_task'
7
+ desc 'Run all examples'
8
+ RSpec::Core::RakeTask.new(:spec) do |t|
9
+ t.rspec_opts = %w[--color]
10
+ end
11
+
12
+ task :default => :spec
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  $:.push File.expand_path('../lib', __FILE__)
4
- require 'greeb/version'
4
+ require 'greeb'
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = 'greeb'
@@ -11,11 +11,13 @@ Gem::Specification.new do |s|
11
11
  s.email = [ 'dmitry@eveel.ru' ]
12
12
  s.homepage = 'https://github.com/eveel/greeb'
13
13
  s.summary = 'Greeb is a Graphematical Analyzer.'
14
- s.description = 'Greeb is a Graphematical Analyzer, ' \
14
+ s.description = 'Greeb is awesome Graphematical Analyzer, ' \
15
15
  'written in Ruby.'
16
16
 
17
17
  s.rubyforge_project = 'greeb'
18
18
 
19
+ s.add_dependency 'rspec', '~> 2.4.0'
20
+
19
21
  s.files = `git ls-files`.split("\n")
20
22
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
23
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
@@ -1,7 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # Enumerable module additions.
4
+ #
3
5
  module Enumerable
4
- def collect_with_index(i = -1)
6
+ def collect_with_index(i = -1) # :nodoc:
5
7
  collect { |e| yield(e, i += 1) }
6
8
  end
7
9
  alias map_with_index collect_with_index
@@ -1,144 +1,11 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'meta_array'
4
- require 'enumerable'
5
-
3
+ # Greeb is awesome Graphematical Analyzer.
4
+ #
6
5
  module Greeb
7
- RU_LEX = /^[А-Яа-я]+$/u
8
- EN_LEX = /^[A-Za-z]+$/u
9
- EOL = /^\n+$/u
10
- SEP = /^[*=_\/\\ ]$/u
11
- PUN = /^(\.|\!|\?)$/u
12
- SPUN = /^(\,|\[|\]|\(|\)|\-|:|;)$/u
13
- DIG = /^[0-9]+$/u
14
- DIL = /^[А-Яа-яA-Za-z0-9]+$/u
15
- EMPTY = ''
16
-
17
- class Parser
18
- attr_accessor :origin
19
- private :origin=
20
-
21
- attr_writer :tree
22
- private :tree=
23
-
24
- def initialize(origin)
25
- self.origin = origin
26
- end
27
-
28
- def tree
29
- @tree ||= parse(origin)
30
- end
31
-
32
- private
33
- def parse(origin) # :nodoc:
34
- tree = MetaArray.new
35
-
36
- # paragraph
37
- p_id = 0
38
-
39
- # sentence
40
- s_id = 0
41
-
42
- # subsentence
43
- ss_id = 0
44
-
45
- token = ''
6
+ # Version of the Greeb.
7
+ #
8
+ VERSION = "0.0.2"
46
9
 
47
- origin.each_char do |c|
48
- puts "[#{token.inspect}] ← #{c.inspect}"
49
- case c
50
- when EOL then begin
51
- case token
52
- when EMPTY then token << c
53
- when EOL then begin
54
- token = ''
55
- p_id += 1
56
- s_id = 0
57
- ss_id = 0
58
- end
59
- else
60
- tree[p_id][s_id][ss_id] << token
61
- token = c
62
- end
63
- end
64
- when SEP then begin
65
- case token
66
- when EMPTY
67
- else
68
- tree[p_id][s_id][ss_id] << token
69
- while tree[p_id][s_id][ss_id].last == c
70
- tree[p_id][s_id][ss_id].pop
71
- end
72
- tree[p_id][s_id][ss_id] << c
73
- token = ''
74
- end
75
- end
76
- when PUN then begin
77
- case token
78
- when EMPTY
79
- else
80
- tree[p_id][s_id][ss_id] << token
81
- tree[p_id][s_id][ss_id] << c
82
- token = ''
83
- s_id += 1
84
- ss_id = 0
85
- end
86
- end
87
- when SPUN then begin
88
- case token
89
- when EMPTY
90
- else
91
- tree[p_id][s_id][ss_id] << token
92
- tree[p_id][s_id][ss_id] << c
93
- token = ''
94
- ss_id += 1
95
- end
96
- end
97
- when RU_LEX then begin
98
- case token
99
- when EOL then begin
100
- tree[p_id][s_id][ss_id] << ' '
101
- token = c
102
- end
103
- else
104
- token << c
105
- end
106
- end
107
- when EN_LEX then begin
108
- case token
109
- when EOL then begin
110
- tree[p_id][s_id][ss_id] << ' '
111
- token = c
112
- end
113
- else
114
- token << c
115
- end
116
- end
117
- when DIG then begin
118
- case token
119
- when EOL then begin
120
- tree[p_id][s_id][ss_id] << ' '
121
- token = c
122
- end
123
- else
124
- token << c
125
- end
126
- end
127
- when DIL then begin
128
- case token
129
- when EOL then begin
130
- tree[p_id][s_id][ss_id] << token
131
- token = c
132
- end
133
- else
134
- token << c
135
- end
136
- end
137
- end
138
- end
139
- tree[p_id][s_id][ss_id] << token
140
- tree.delete(nil)
141
- tree.to_a
142
- end
143
- end
10
+ require 'greeb/parser'
144
11
  end
@@ -0,0 +1,176 @@
1
+ # encoding: utf-8
2
+
3
+ require 'meta_array'
4
+ require 'enumerable'
5
+
6
+ # Graphematical Parser of the Greeb.
7
+ # Use it with love.
8
+ #
9
+ class Greeb::Parser
10
+ # Russian lexeme (i.e.: "хуй").
11
+ #
12
+ RUSSIAN_LEXEME = /^[А-Яа-яЁё]+$/u
13
+
14
+ # English lexeme (i.e.: "foo").
15
+ #
16
+ ENGLISH_LEXEME = /^[A-Za-z]+$/u
17
+
18
+ # End of Line sequence (i.e.: "\n").
19
+ #
20
+ END_OF_LINE = /^\n+$/u
21
+
22
+ # In-subsentence seprator (i.e.: "*" or "\").
23
+ #
24
+ SEPARATOR = /^[*=_\/\\ ]$/u
25
+
26
+ # Punctuation character (i.e.: "." or "!").
27
+ #
28
+ PUNCTUATION = /^(\.|\!|\?)$/u
29
+
30
+ # In-sentence punctuation character (i.e.: "," or "-").
31
+ #
32
+ SENTENCE_PUNCTUATION = /^(\,|\[|\]|\(|\)|\-|:|;)$/u
33
+
34
+ # Digit (i.e.: "1337").
35
+ #
36
+ DIGIT = /^[0-9]+$/u
37
+
38
+ # Digit-Letter complex (i.e.: "0xDEADBEEF").
39
+ #
40
+ DIGIT_LETTER = /^[А-Яа-яA-Za-z0-9Ёё]+$/u
41
+
42
+ # Empty string (i.e.: "").
43
+ #
44
+ EMPTY = ''
45
+
46
+ attr_accessor :text
47
+ private :text=
48
+
49
+ # Create a new instance of Greeb::Parser.
50
+ #
51
+ # ==== Parameters
52
+ # text<String>:: Source text.
53
+ #
54
+ def initialize(text)
55
+ self.text = text
56
+ end
57
+
58
+ # Perform the text parsing.
59
+ #
60
+ # ==== Returns
61
+ # Array:: Tree of Graphematical Analysis of text.
62
+ #
63
+ def parse
64
+ return @tree if @tree
65
+
66
+ # parse tree
67
+ tree = MetaArray.new
68
+
69
+ # paragraph, sentence, subsentence
70
+ p_id, s_id, ss_id = 0, 0, 0
71
+
72
+ # current token
73
+ token = ''
74
+
75
+ # run FSM
76
+ text.each_char do |c|
77
+ case c
78
+ when END_OF_LINE then begin
79
+ case token
80
+ when EMPTY then token << c
81
+ when END_OF_LINE then begin
82
+ token = ''
83
+ p_id += 1
84
+ s_id = 0
85
+ ss_id = 0
86
+ end
87
+ else
88
+ tree[p_id][s_id][ss_id] << token
89
+ token = c
90
+ end
91
+ end
92
+ when SEPARATOR then begin
93
+ case token
94
+ when EMPTY
95
+ else
96
+ tree[p_id][s_id][ss_id] << token
97
+ while tree[p_id][s_id][ss_id].last == c
98
+ tree[p_id][s_id][ss_id].pop
99
+ end
100
+ tree[p_id][s_id][ss_id] << c
101
+ token = ''
102
+ end
103
+ end
104
+ when PUNCTUATION then begin
105
+ case token
106
+ when EMPTY
107
+ else
108
+ tree[p_id][s_id][ss_id] << token
109
+ tree[p_id][s_id][ss_id] << c
110
+ token = ''
111
+ s_id += 1
112
+ ss_id = 0
113
+ end
114
+ end
115
+ when SENTENCE_PUNCTUATION then begin
116
+ case token
117
+ when EMPTY
118
+ else
119
+ tree[p_id][s_id][ss_id] << token
120
+ tree[p_id][s_id][ss_id] << c
121
+ token = ''
122
+ ss_id += 1
123
+ end
124
+ end
125
+ when RUSSIAN_LEXEME then begin
126
+ case token
127
+ when END_OF_LINE then begin
128
+ tree[p_id][s_id][ss_id] << ' '
129
+ token = c
130
+ end
131
+ else
132
+ token << c
133
+ end
134
+ end
135
+ when ENGLISH_LEXEME then begin
136
+ case token
137
+ when END_OF_LINE then begin
138
+ tree[p_id][s_id][ss_id] << ' '
139
+ token = c
140
+ end
141
+ else
142
+ token << c
143
+ end
144
+ end
145
+ when DIGIT then begin
146
+ case token
147
+ when END_OF_LINE then begin
148
+ tree[p_id][s_id][ss_id] << ' '
149
+ token = c
150
+ end
151
+ else
152
+ token << c
153
+ end
154
+ end
155
+ when DIGIT_LETTER then begin
156
+ case token
157
+ when END_OF_LINE then begin
158
+ tree[p_id][s_id][ss_id] << token
159
+ token = c
160
+ end
161
+ else
162
+ token << c
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ unless token.empty?
169
+ tree[p_id][s_id][ss_id] << token
170
+ end
171
+
172
+ tree.delete(nil)
173
+
174
+ @tree = tree.to_a
175
+ end
176
+ end
@@ -1,5 +1,8 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # MetaArray is an Array, which creates subarrays
4
+ # on non-existent elements.
5
+ #
3
6
  class MetaArray < Array
4
7
  def [] id
5
8
  super(id) or begin
@@ -0,0 +1,63 @@
1
+ # encoding: utf-8
2
+
3
+ require File.expand_path('../spec_helper.rb', __FILE__)
4
+
5
+ describe Greeb::Parser do
6
+ it 'should parse very simple strings' do
7
+ 'буба сука дебил'.should be_parsed_as([
8
+ [
9
+ [ [ 'буба', ' ', 'сука', ' ', 'дебил' ] ]
10
+ ]
11
+ ])
12
+ end
13
+
14
+ it 'should parse one sentence with subsentences' do
15
+ 'буба, сука, дебил'.should be_parsed_as([
16
+ [
17
+ [
18
+ [ 'буба', ',' ],
19
+ [ 'сука', ',' ],
20
+ [ 'дебил' ]
21
+ ]
22
+ ]
23
+ ])
24
+ end
25
+
26
+ it 'should parse two simple paragraphs' do
27
+ "буба сука дебил\n\nточно!".should be_parsed_as([
28
+ [
29
+ [ [ 'буба', ' ', 'сука', ' ', 'дебил' ] ]
30
+ ],
31
+ [
32
+ [ [ 'точно', '!' ] ]
33
+ ]
34
+ ])
35
+ end
36
+
37
+ it 'should parse two sentences in paragraph' do
38
+ "буба молодец? буба умница.".should be_parsed_as([
39
+ [
40
+ [ [ 'буба', ' ', 'молодец', '?' ] ],
41
+ [ [ 'буба', ' ', 'умница', '.' ] ]
42
+ ]
43
+ ])
44
+ end
45
+
46
+ it 'should parse sentences with floating point values' do
47
+ 'буба не считает Пи равной 3.14'.should be_parsed_as([
48
+ [
49
+ [ [ 'буба', ' ', 'не', ' ', 'считает', ' ',
50
+ 'Пи', ' ', 'равной', ' ', '3.14' ] ]
51
+ ]
52
+ ])
53
+ end
54
+
55
+ it 'should parse sentences with floating "dot" values' do
56
+ 'буба не считает Пи равной 3,14'.should be_parsed_as([
57
+ [
58
+ [ [ 'буба', ' ', 'не', ' ', 'считает', ' ',
59
+ 'Пи', ' ', 'равной', ' ', '3,14' ] ]
60
+ ]
61
+ ])
62
+ end
63
+ end
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ require File.expand_path('../../lib/greeb', __FILE__)
4
+
5
+ RSpec.configure do |c|
6
+ c.mock_with :rspec
7
+ end
8
+
9
+ RSpec::Matchers.define :be_parsed_as do |expected|
10
+ match do |actual|
11
+ tree = Greeb::Parser.new(actual).parse
12
+ tree == expected
13
+ end
14
+ end
metadata CHANGED
@@ -1,18 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: greeb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
5
- prerelease: !!null
4
+ version: 0.0.2
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dmitry A. Ustalov
9
- autorequire: !!null
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-02-06 00:00:00.000000000 +05:00
13
- default_executable: !!null
14
- dependencies: []
15
- description: Greeb is a Graphematical Analyzer, written in Ruby.
12
+ date: 2011-02-20 00:00:00.000000000 +05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ requirement: &81165430 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 2.4.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *81165430
26
+ description: Greeb is awesome Graphematical Analyzer, written in Ruby.
16
27
  email:
17
28
  - dmitry@eveel.ru
18
29
  executables: []
@@ -21,18 +32,21 @@ extra_rdoc_files: []
21
32
  files:
22
33
  - .gitignore
23
34
  - Gemfile
35
+ - Gemfile.lock
24
36
  - README
25
37
  - Rakefile
26
38
  - greeb-test.rb
27
39
  - greeb.gemspec
28
40
  - lib/enumerable.rb
29
41
  - lib/greeb.rb
30
- - lib/greeb/version.rb
42
+ - lib/greeb/parser.rb
31
43
  - lib/meta_array.rb
44
+ - spec/parser_spec.rb
45
+ - spec/spec_helper.rb
32
46
  has_rdoc: true
33
47
  homepage: https://github.com/eveel/greeb
34
48
  licenses: []
35
- post_install_message: !!null
49
+ post_install_message:
36
50
  rdoc_options: []
37
51
  require_paths:
38
52
  - lib
@@ -50,8 +64,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
50
64
  version: '0'
51
65
  requirements: []
52
66
  rubyforge_project: greeb
53
- rubygems_version: 1.5.0
54
- signing_key: !!null
67
+ rubygems_version: 1.5.2
68
+ signing_key:
55
69
  specification_version: 3
56
70
  summary: Greeb is a Graphematical Analyzer.
57
- test_files: []
71
+ test_files:
72
+ - spec/parser_spec.rb
73
+ - spec/spec_helper.rb
@@ -1,5 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Greeb
4
- VERSION = "0.0.1"
5
- end