greeb 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## RUBINIUS
17
+ *.rbc
18
+
19
+ ## NETBEANS
20
+ nbproject
21
+
22
+ ## REDCAR
23
+ .redcar
24
+
25
+ ## RVM
26
+ .rvmrc
27
+
28
+ ## BUNDLER
29
+ .bundle
30
+
31
+ ## PROJECT::GENERAL
32
+ coverage
33
+ pkg
34
+
35
+ ## PROJECT::SPECIFIC
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gemspec
data/README ADDED
File without changes
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+
3
+ require 'bundler'
4
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,141 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'rubygems'
5
+ require 'graphviz'
6
+
7
+ $:.unshift('./lib')
8
+ require 'greeb'
9
+
10
+ origin = <<-END
11
+ - Сынок, чего это от тебя зигами пахнет,
12
+ опять на Манежную площадь ходил?
13
+
14
+ - Нет мама, я в метро ехал, там назиговано было!!
15
+
16
+
17
+
18
+ Четырнадцать, восемьдесять восемь: 14/88.
19
+ Вот так блять
20
+ END
21
+ origin.chomp!
22
+
23
+ def identify(token)
24
+ case token
25
+ when Greeb::RU_LEX then 'RU_LEX'
26
+ when Greeb::EN_LEX then 'EN_LEX'
27
+ when Greeb::EOL then 'EOL'
28
+ when Greeb::SEP then 'SEP'
29
+ when Greeb::PUN then 'PUN'
30
+ when Greeb::SPUN then 'SPUN'
31
+ when Greeb::DIG then 'DIG'
32
+ when Greeb::DIL then 'DIL'
33
+ else
34
+ '?!'
35
+ end
36
+ end
37
+
38
+ greeb = Greeb::Parser.new(origin)
39
+ text = greeb.tree
40
+
41
+ g = GraphViz.new('graphematics', 'type' => 'graph')
42
+
43
+ g.node[:color] = '#ddaa66'
44
+ g.node[:style] = 'filled'
45
+ g.node[:shape] = 'box'
46
+ g.node[:penwidth] = '1'
47
+ g.node[:fontname] = 'PT Sans'
48
+ g.node[:fontsize] = '8'
49
+ g.node[:fillcolor]= '#ffeecc'
50
+ g.node[:fontcolor]= '#775500'
51
+ g.node[:margin] = '0.0'
52
+
53
+ g.edge[:color] = '#999999'
54
+ g.edge[:weight] = '1'
55
+ g.edge[:fontname] = 'PT Sans'
56
+ g.edge[:fontcolor]= '#444444'
57
+ g.edge[:fontsize] = '6'
58
+ g.edge[:dir] = 'forward'
59
+ g.edge[:arrowsize]= '0.5'
60
+
61
+ bid = 'begin'
62
+ g.add_node(bid).tap do |node|
63
+ node.label = "Начало\nтекста"
64
+ node.shape = 'ellipse'
65
+ node.style = ''
66
+ end
67
+
68
+ eid = 'end'
69
+ g.add_node(eid).tap do |node|
70
+ node.label = "Конец\nтекста"
71
+ node.shape = 'ellipse'
72
+ node.style = ''
73
+ end
74
+
75
+ tree = text.map_with_index do |paragraph, i|
76
+ pid = "p#{i}"
77
+ sentences = paragraph.map_with_index do |sentence, j|
78
+ sid = "#{pid}s#{j}"
79
+ subsentences = sentence.map_with_index do |subsentence, k|
80
+ ssid = "#{sid}ss#{k}"
81
+ tokens = subsentence.map_with_index do |token, l|
82
+ next if ' ' == token
83
+ [ "#{ssid}t#{l}", token, l ]
84
+ end
85
+ tokens.delete(nil)
86
+ [ ssid, tokens, k ]
87
+ end
88
+ [ sid, subsentences, j ]
89
+ end
90
+ [ pid, sentences, i ]
91
+ end
92
+
93
+ tree.each do |pid, paragraph, i|
94
+ g.add_node(pid).tap do |node|
95
+ node.label = "Абзац\n№#{i + 1}"
96
+ node.shape = 'ellipse'
97
+ end
98
+ g.add_edge(bid, pid)
99
+
100
+ paragraph.each do |sid, sentence, j|
101
+ g.add_node(sid).tap do |node|
102
+ node.label = "Предложение\n№#{j + 1}"
103
+ node.shape = 'ellipse'
104
+ end
105
+ g.add_edge(pid, sid)
106
+
107
+ sentence.each do |ssid, subsentence, k|
108
+ g.add_node(ssid).tap do |node|
109
+ node.label = "Подпредложение\n№#{k + 1}"
110
+ node.shape = 'ellipse'
111
+ end
112
+ g.add_edge(sid, ssid)
113
+
114
+ subsentence.each do |tid, token, l|
115
+ g.add_node(tid).label = token
116
+ g.add_edge(ssid, tid).label = identify(token)
117
+ g.add_edge(tid, eid)
118
+ end
119
+
120
+ subsentence.each_cons(2) do |(tid1, token1, l1),
121
+ (tid2, token2, l2)|
122
+ g.add_edge(tid1, tid2).tap do |edge|
123
+ edge.weight = 0.25
124
+ edge.style = 'dashed'
125
+ end
126
+ end
127
+ end
128
+
129
+ sentence.each_cons(2) do |(ssid1, subsentence1, k1),
130
+ (ssid2, subsentence2, k2)|
131
+ tid1, token1, l1 = subsentence1.last
132
+ tid2, token2, l2 = subsentence2.first
133
+ g.add_edge(tid1, tid2).tap do |edge|
134
+ edge.weight = 0.5
135
+ edge.style = 'dashed'
136
+ end
137
+ end
138
+ end
139
+ end
140
+
141
+ g.output(:output => 'png', :file => 'graph.png')
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+
3
+ $:.push File.expand_path('../lib', __FILE__)
4
+ require 'greeb/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'greeb'
8
+ s.version = Greeb::VERSION
9
+ s.platform = Gem::Platform::RUBY
10
+ s.authors = [ 'Dmitry A. Ustalov' ]
11
+ s.email = [ 'dmitry@eveel.ru' ]
12
+ s.homepage = 'https://github.com/eveel/greeb'
13
+ s.summary = 'Greeb is a Graphematical Analyzer.'
14
+ s.description = 'Greeb is a Graphematical Analyzer, ' \
15
+ 'written in Ruby.'
16
+
17
+ s.rubyforge_project = 'greeb'
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
+ s.require_paths = [ 'lib' ]
23
+ end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+
3
+ module Enumerable
4
+ def collect_with_index(i = -1)
5
+ collect { |e| yield(e, i += 1) }
6
+ end
7
+ alias map_with_index collect_with_index
8
+ end
@@ -0,0 +1,144 @@
1
+ # encoding: utf-8
2
+
3
+ require 'meta_array'
4
+ require 'enumerable'
5
+
6
+ module Greeb
7
+ RU_LEX = /^[А-Яа-я]+$/u
8
+ EN_LEX = /^[A-Za-z]+$/u
9
+ EOL = /^\n+$/u
10
+ SEP = /^[*=_\/\\ ]$/u
11
+ PUN = /^(\.|\!|\?)$/u
12
+ SPUN = /^(\,|\[|\]|\(|\)|\-|:|;)$/u
13
+ DIG = /^[0-9]+$/u
14
+ DIL = /^[А-Яа-яA-Za-z0-9]+$/u
15
+ EMPTY = ''
16
+
17
+ class Parser
18
+ attr_accessor :origin
19
+ private :origin=
20
+
21
+ attr_writer :tree
22
+ private :tree=
23
+
24
+ def initialize(origin)
25
+ self.origin = origin
26
+ end
27
+
28
+ def tree
29
+ @tree ||= parse(origin)
30
+ end
31
+
32
+ private
33
+ def parse(origin) # :nodoc:
34
+ tree = MetaArray.new
35
+
36
+ # paragraph
37
+ p_id = 0
38
+
39
+ # sentence
40
+ s_id = 0
41
+
42
+ # subsentence
43
+ ss_id = 0
44
+
45
+ token = ''
46
+
47
+ origin.each_char do |c|
48
+ puts "[#{token.inspect}] ← #{c.inspect}"
49
+ case c
50
+ when EOL then begin
51
+ case token
52
+ when EMPTY then token << c
53
+ when EOL then begin
54
+ token = ''
55
+ p_id += 1
56
+ s_id = 0
57
+ ss_id = 0
58
+ end
59
+ else
60
+ tree[p_id][s_id][ss_id] << token
61
+ token = c
62
+ end
63
+ end
64
+ when SEP then begin
65
+ case token
66
+ when EMPTY
67
+ else
68
+ tree[p_id][s_id][ss_id] << token
69
+ while tree[p_id][s_id][ss_id].last == c
70
+ tree[p_id][s_id][ss_id].pop
71
+ end
72
+ tree[p_id][s_id][ss_id] << c
73
+ token = ''
74
+ end
75
+ end
76
+ when PUN then begin
77
+ case token
78
+ when EMPTY
79
+ else
80
+ tree[p_id][s_id][ss_id] << token
81
+ tree[p_id][s_id][ss_id] << c
82
+ token = ''
83
+ s_id += 1
84
+ ss_id = 0
85
+ end
86
+ end
87
+ when SPUN then begin
88
+ case token
89
+ when EMPTY
90
+ else
91
+ tree[p_id][s_id][ss_id] << token
92
+ tree[p_id][s_id][ss_id] << c
93
+ token = ''
94
+ ss_id += 1
95
+ end
96
+ end
97
+ when RU_LEX then begin
98
+ case token
99
+ when EOL then begin
100
+ tree[p_id][s_id][ss_id] << ' '
101
+ token = c
102
+ end
103
+ else
104
+ token << c
105
+ end
106
+ end
107
+ when EN_LEX then begin
108
+ case token
109
+ when EOL then begin
110
+ tree[p_id][s_id][ss_id] << ' '
111
+ token = c
112
+ end
113
+ else
114
+ token << c
115
+ end
116
+ end
117
+ when DIG then begin
118
+ case token
119
+ when EOL then begin
120
+ tree[p_id][s_id][ss_id] << ' '
121
+ token = c
122
+ end
123
+ else
124
+ token << c
125
+ end
126
+ end
127
+ when DIL then begin
128
+ case token
129
+ when EOL then begin
130
+ tree[p_id][s_id][ss_id] << token
131
+ token = c
132
+ end
133
+ else
134
+ token << c
135
+ end
136
+ end
137
+ end
138
+ end
139
+ tree[p_id][s_id][ss_id] << token
140
+ tree.delete(nil)
141
+ tree.to_a
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+
3
+ module Greeb
4
+ VERSION = "0.0.1"
5
+ end
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+
3
+ class MetaArray < Array
4
+ def [] id
5
+ super(id) or begin
6
+ self.class.new.tap do |element|
7
+ self[id] = element
8
+ end
9
+ end
10
+ end
11
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: greeb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease: !!null
6
+ platform: ruby
7
+ authors:
8
+ - Dmitry A. Ustalov
9
+ autorequire: !!null
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-02-06 00:00:00.000000000 +05:00
13
+ default_executable: !!null
14
+ dependencies: []
15
+ description: Greeb is a Graphematical Analyzer, written in Ruby.
16
+ email:
17
+ - dmitry@eveel.ru
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - Gemfile
24
+ - README
25
+ - Rakefile
26
+ - greeb-test.rb
27
+ - greeb.gemspec
28
+ - lib/enumerable.rb
29
+ - lib/greeb.rb
30
+ - lib/greeb/version.rb
31
+ - lib/meta_array.rb
32
+ has_rdoc: true
33
+ homepage: https://github.com/eveel/greeb
34
+ licenses: []
35
+ post_install_message: !!null
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project: greeb
53
+ rubygems_version: 1.5.0
54
+ signing_key: !!null
55
+ specification_version: 3
56
+ summary: Greeb is a Graphematical Analyzer.
57
+ test_files: []