greeb 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ greeb (0.0.2)
5
+ rspec (~> 2.4.0)
6
+
7
+ GEM
8
+ remote: http://rubygems.org/
9
+ specs:
10
+ diff-lcs (1.1.2)
11
+ rspec (2.4.0)
12
+ rspec-core (~> 2.4.0)
13
+ rspec-expectations (~> 2.4.0)
14
+ rspec-mocks (~> 2.4.0)
15
+ rspec-core (2.4.0)
16
+ rspec-expectations (2.4.0)
17
+ diff-lcs (~> 1.1.2)
18
+ rspec-mocks (2.4.0)
19
+
20
+ PLATFORMS
21
+ ruby
22
+
23
+ DEPENDENCIES
24
+ greeb!
data/Rakefile CHANGED
@@ -2,3 +2,11 @@
2
2
 
3
3
  require 'bundler'
4
4
  Bundler::GemHelper.install_tasks
5
+
6
+ require 'rspec/core/rake_task'
7
+ desc 'Run all examples'
8
+ RSpec::Core::RakeTask.new(:spec) do |t|
9
+ t.rspec_opts = %w[--color]
10
+ end
11
+
12
+ task :default => :spec
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  $:.push File.expand_path('../lib', __FILE__)
4
- require 'greeb/version'
4
+ require 'greeb'
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = 'greeb'
@@ -11,11 +11,13 @@ Gem::Specification.new do |s|
11
11
  s.email = [ 'dmitry@eveel.ru' ]
12
12
  s.homepage = 'https://github.com/eveel/greeb'
13
13
  s.summary = 'Greeb is a Graphematical Analyzer.'
14
- s.description = 'Greeb is a Graphematical Analyzer, ' \
14
+ s.description = 'Greeb is awesome Graphematical Analyzer, ' \
15
15
  'written in Ruby.'
16
16
 
17
17
  s.rubyforge_project = 'greeb'
18
18
 
19
+ s.add_dependency 'rspec', '~> 2.4.0'
20
+
19
21
  s.files = `git ls-files`.split("\n")
20
22
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
23
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
@@ -1,7 +1,9 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # Enumerable module additions.
4
+ #
3
5
  module Enumerable
4
- def collect_with_index(i = -1)
6
+ def collect_with_index(i = -1) # :nodoc:
5
7
  collect { |e| yield(e, i += 1) }
6
8
  end
7
9
  alias map_with_index collect_with_index
@@ -1,144 +1,11 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'meta_array'
4
- require 'enumerable'
5
-
3
+ # Greeb is awesome Graphematical Analyzer.
4
+ #
6
5
  module Greeb
7
- RU_LEX = /^[А-Яа-я]+$/u
8
- EN_LEX = /^[A-Za-z]+$/u
9
- EOL = /^\n+$/u
10
- SEP = /^[*=_\/\\ ]$/u
11
- PUN = /^(\.|\!|\?)$/u
12
- SPUN = /^(\,|\[|\]|\(|\)|\-|:|;)$/u
13
- DIG = /^[0-9]+$/u
14
- DIL = /^[А-Яа-яA-Za-z0-9]+$/u
15
- EMPTY = ''
16
-
17
- class Parser
18
- attr_accessor :origin
19
- private :origin=
20
-
21
- attr_writer :tree
22
- private :tree=
23
-
24
- def initialize(origin)
25
- self.origin = origin
26
- end
27
-
28
- def tree
29
- @tree ||= parse(origin)
30
- end
31
-
32
- private
33
- def parse(origin) # :nodoc:
34
- tree = MetaArray.new
35
-
36
- # paragraph
37
- p_id = 0
38
-
39
- # sentence
40
- s_id = 0
41
-
42
- # subsentence
43
- ss_id = 0
44
-
45
- token = ''
6
+ # Version of the Greeb.
7
+ #
8
+ VERSION = "0.0.2"
46
9
 
47
- origin.each_char do |c|
48
- puts "[#{token.inspect}] ← #{c.inspect}"
49
- case c
50
- when EOL then begin
51
- case token
52
- when EMPTY then token << c
53
- when EOL then begin
54
- token = ''
55
- p_id += 1
56
- s_id = 0
57
- ss_id = 0
58
- end
59
- else
60
- tree[p_id][s_id][ss_id] << token
61
- token = c
62
- end
63
- end
64
- when SEP then begin
65
- case token
66
- when EMPTY
67
- else
68
- tree[p_id][s_id][ss_id] << token
69
- while tree[p_id][s_id][ss_id].last == c
70
- tree[p_id][s_id][ss_id].pop
71
- end
72
- tree[p_id][s_id][ss_id] << c
73
- token = ''
74
- end
75
- end
76
- when PUN then begin
77
- case token
78
- when EMPTY
79
- else
80
- tree[p_id][s_id][ss_id] << token
81
- tree[p_id][s_id][ss_id] << c
82
- token = ''
83
- s_id += 1
84
- ss_id = 0
85
- end
86
- end
87
- when SPUN then begin
88
- case token
89
- when EMPTY
90
- else
91
- tree[p_id][s_id][ss_id] << token
92
- tree[p_id][s_id][ss_id] << c
93
- token = ''
94
- ss_id += 1
95
- end
96
- end
97
- when RU_LEX then begin
98
- case token
99
- when EOL then begin
100
- tree[p_id][s_id][ss_id] << ' '
101
- token = c
102
- end
103
- else
104
- token << c
105
- end
106
- end
107
- when EN_LEX then begin
108
- case token
109
- when EOL then begin
110
- tree[p_id][s_id][ss_id] << ' '
111
- token = c
112
- end
113
- else
114
- token << c
115
- end
116
- end
117
- when DIG then begin
118
- case token
119
- when EOL then begin
120
- tree[p_id][s_id][ss_id] << ' '
121
- token = c
122
- end
123
- else
124
- token << c
125
- end
126
- end
127
- when DIL then begin
128
- case token
129
- when EOL then begin
130
- tree[p_id][s_id][ss_id] << token
131
- token = c
132
- end
133
- else
134
- token << c
135
- end
136
- end
137
- end
138
- end
139
- tree[p_id][s_id][ss_id] << token
140
- tree.delete(nil)
141
- tree.to_a
142
- end
143
- end
10
+ require 'greeb/parser'
144
11
  end
@@ -0,0 +1,176 @@
1
+ # encoding: utf-8
2
+
3
+ require 'meta_array'
4
+ require 'enumerable'
5
+
6
+ # Graphematical Parser of the Greeb.
7
+ # Use it with love.
8
+ #
9
+ class Greeb::Parser
10
+ # Russian lexeme (i.e.: "хуй").
11
+ #
12
+ RUSSIAN_LEXEME = /^[А-Яа-яЁё]+$/u
13
+
14
+ # English lexeme (i.e.: "foo").
15
+ #
16
+ ENGLISH_LEXEME = /^[A-Za-z]+$/u
17
+
18
+ # End of Line sequence (i.e.: "\n").
19
+ #
20
+ END_OF_LINE = /^\n+$/u
21
+
22
+ # In-subsentence seprator (i.e.: "*" or "\").
23
+ #
24
+ SEPARATOR = /^[*=_\/\\ ]$/u
25
+
26
+ # Punctuation character (i.e.: "." or "!").
27
+ #
28
+ PUNCTUATION = /^(\.|\!|\?)$/u
29
+
30
+ # In-sentence punctuation character (i.e.: "," or "-").
31
+ #
32
+ SENTENCE_PUNCTUATION = /^(\,|\[|\]|\(|\)|\-|:|;)$/u
33
+
34
+ # Digit (i.e.: "1337").
35
+ #
36
+ DIGIT = /^[0-9]+$/u
37
+
38
+ # Digit-Letter complex (i.e.: "0xDEADBEEF").
39
+ #
40
+ DIGIT_LETTER = /^[А-Яа-яA-Za-z0-9Ёё]+$/u
41
+
42
+ # Empty string (i.e.: "").
43
+ #
44
+ EMPTY = ''
45
+
46
+ attr_accessor :text
47
+ private :text=
48
+
49
+ # Create a new instance of Greeb::Parser.
50
+ #
51
+ # ==== Parameters
52
+ # text<String>:: Source text.
53
+ #
54
+ def initialize(text)
55
+ self.text = text
56
+ end
57
+
58
+ # Perform the text parsing.
59
+ #
60
+ # ==== Returns
61
+ # Array:: Tree of Graphematical Analysis of text.
62
+ #
63
+ def parse
64
+ return @tree if @tree
65
+
66
+ # parse tree
67
+ tree = MetaArray.new
68
+
69
+ # paragraph, sentence, subsentence
70
+ p_id, s_id, ss_id = 0, 0, 0
71
+
72
+ # current token
73
+ token = ''
74
+
75
+ # run FSM
76
+ text.each_char do |c|
77
+ case c
78
+ when END_OF_LINE then begin
79
+ case token
80
+ when EMPTY then token << c
81
+ when END_OF_LINE then begin
82
+ token = ''
83
+ p_id += 1
84
+ s_id = 0
85
+ ss_id = 0
86
+ end
87
+ else
88
+ tree[p_id][s_id][ss_id] << token
89
+ token = c
90
+ end
91
+ end
92
+ when SEPARATOR then begin
93
+ case token
94
+ when EMPTY
95
+ else
96
+ tree[p_id][s_id][ss_id] << token
97
+ while tree[p_id][s_id][ss_id].last == c
98
+ tree[p_id][s_id][ss_id].pop
99
+ end
100
+ tree[p_id][s_id][ss_id] << c
101
+ token = ''
102
+ end
103
+ end
104
+ when PUNCTUATION then begin
105
+ case token
106
+ when EMPTY
107
+ else
108
+ tree[p_id][s_id][ss_id] << token
109
+ tree[p_id][s_id][ss_id] << c
110
+ token = ''
111
+ s_id += 1
112
+ ss_id = 0
113
+ end
114
+ end
115
+ when SENTENCE_PUNCTUATION then begin
116
+ case token
117
+ when EMPTY
118
+ else
119
+ tree[p_id][s_id][ss_id] << token
120
+ tree[p_id][s_id][ss_id] << c
121
+ token = ''
122
+ ss_id += 1
123
+ end
124
+ end
125
+ when RUSSIAN_LEXEME then begin
126
+ case token
127
+ when END_OF_LINE then begin
128
+ tree[p_id][s_id][ss_id] << ' '
129
+ token = c
130
+ end
131
+ else
132
+ token << c
133
+ end
134
+ end
135
+ when ENGLISH_LEXEME then begin
136
+ case token
137
+ when END_OF_LINE then begin
138
+ tree[p_id][s_id][ss_id] << ' '
139
+ token = c
140
+ end
141
+ else
142
+ token << c
143
+ end
144
+ end
145
+ when DIGIT then begin
146
+ case token
147
+ when END_OF_LINE then begin
148
+ tree[p_id][s_id][ss_id] << ' '
149
+ token = c
150
+ end
151
+ else
152
+ token << c
153
+ end
154
+ end
155
+ when DIGIT_LETTER then begin
156
+ case token
157
+ when END_OF_LINE then begin
158
+ tree[p_id][s_id][ss_id] << token
159
+ token = c
160
+ end
161
+ else
162
+ token << c
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ unless token.empty?
169
+ tree[p_id][s_id][ss_id] << token
170
+ end
171
+
172
+ tree.delete(nil)
173
+
174
+ @tree = tree.to_a
175
+ end
176
+ end
@@ -1,5 +1,8 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # MetaArray is an Array, which creates subarrays
4
+ # on non-existent elements.
5
+ #
3
6
  class MetaArray < Array
4
7
  def [] id
5
8
  super(id) or begin
@@ -0,0 +1,63 @@
1
+ # encoding: utf-8
2
+
3
+ require File.expand_path('../spec_helper.rb', __FILE__)
4
+
5
+ describe Greeb::Parser do
6
+ it 'should parse very simple strings' do
7
+ 'буба сука дебил'.should be_parsed_as([
8
+ [
9
+ [ [ 'буба', ' ', 'сука', ' ', 'дебил' ] ]
10
+ ]
11
+ ])
12
+ end
13
+
14
+ it 'should parse one sentence with subsentences' do
15
+ 'буба, сука, дебил'.should be_parsed_as([
16
+ [
17
+ [
18
+ [ 'буба', ',' ],
19
+ [ 'сука', ',' ],
20
+ [ 'дебил' ]
21
+ ]
22
+ ]
23
+ ])
24
+ end
25
+
26
+ it 'should parse two simple paragraphs' do
27
+ "буба сука дебил\n\nточно!".should be_parsed_as([
28
+ [
29
+ [ [ 'буба', ' ', 'сука', ' ', 'дебил' ] ]
30
+ ],
31
+ [
32
+ [ [ 'точно', '!' ] ]
33
+ ]
34
+ ])
35
+ end
36
+
37
+ it 'should parse two sentences in paragraph' do
38
+ "буба молодец? буба умница.".should be_parsed_as([
39
+ [
40
+ [ [ 'буба', ' ', 'молодец', '?' ] ],
41
+ [ [ 'буба', ' ', 'умница', '.' ] ]
42
+ ]
43
+ ])
44
+ end
45
+
46
+ it 'should parse sentences with floating point values' do
47
+ 'буба не считает Пи равной 3.14'.should be_parsed_as([
48
+ [
49
+ [ [ 'буба', ' ', 'не', ' ', 'считает', ' ',
50
+ 'Пи', ' ', 'равной', ' ', '3.14' ] ]
51
+ ]
52
+ ])
53
+ end
54
+
55
+ it 'should parse sentences with floating "dot" values' do
56
+ 'буба не считает Пи равной 3,14'.should be_parsed_as([
57
+ [
58
+ [ [ 'буба', ' ', 'не', ' ', 'считает', ' ',
59
+ 'Пи', ' ', 'равной', ' ', '3,14' ] ]
60
+ ]
61
+ ])
62
+ end
63
+ end
@@ -0,0 +1,14 @@
1
+ # encoding: utf-8
2
+
3
+ require File.expand_path('../../lib/greeb', __FILE__)
4
+
5
+ RSpec.configure do |c|
6
+ c.mock_with :rspec
7
+ end
8
+
9
+ RSpec::Matchers.define :be_parsed_as do |expected|
10
+ match do |actual|
11
+ tree = Greeb::Parser.new(actual).parse
12
+ tree == expected
13
+ end
14
+ end
metadata CHANGED
@@ -1,18 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: greeb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
5
- prerelease: !!null
4
+ version: 0.0.2
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dmitry A. Ustalov
9
- autorequire: !!null
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-02-06 00:00:00.000000000 +05:00
13
- default_executable: !!null
14
- dependencies: []
15
- description: Greeb is a Graphematical Analyzer, written in Ruby.
12
+ date: 2011-02-20 00:00:00.000000000 +05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ requirement: &81165430 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 2.4.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *81165430
26
+ description: Greeb is awesome Graphematical Analyzer, written in Ruby.
16
27
  email:
17
28
  - dmitry@eveel.ru
18
29
  executables: []
@@ -21,18 +32,21 @@ extra_rdoc_files: []
21
32
  files:
22
33
  - .gitignore
23
34
  - Gemfile
35
+ - Gemfile.lock
24
36
  - README
25
37
  - Rakefile
26
38
  - greeb-test.rb
27
39
  - greeb.gemspec
28
40
  - lib/enumerable.rb
29
41
  - lib/greeb.rb
30
- - lib/greeb/version.rb
42
+ - lib/greeb/parser.rb
31
43
  - lib/meta_array.rb
44
+ - spec/parser_spec.rb
45
+ - spec/spec_helper.rb
32
46
  has_rdoc: true
33
47
  homepage: https://github.com/eveel/greeb
34
48
  licenses: []
35
- post_install_message: !!null
49
+ post_install_message:
36
50
  rdoc_options: []
37
51
  require_paths:
38
52
  - lib
@@ -50,8 +64,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
50
64
  version: '0'
51
65
  requirements: []
52
66
  rubyforge_project: greeb
53
- rubygems_version: 1.5.0
54
- signing_key: !!null
67
+ rubygems_version: 1.5.2
68
+ signing_key:
55
69
  specification_version: 3
56
70
  summary: Greeb is a Graphematical Analyzer.
57
- test_files: []
71
+ test_files:
72
+ - spec/parser_spec.rb
73
+ - spec/spec_helper.rb
@@ -1,5 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Greeb
4
- VERSION = "0.0.1"
5
- end