greeb 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,35 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## RUBINIUS
17
+ *.rbc
18
+
19
+ ## NETBEANS
20
+ nbproject
21
+
22
+ ## REDCAR
23
+ .redcar
24
+
25
+ ## RVM
26
+ .rvmrc
27
+
28
+ ## BUNDLER
29
+ .bundle
30
+
31
+ ## PROJECT::GENERAL
32
+ coverage
33
+ pkg
34
+
35
+ ## PROJECT::SPECIFIC
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gemspec
data/README ADDED
File without changes
@@ -0,0 +1,4 @@
1
+ # encoding: utf-8
2
+
3
+ require 'bundler'
4
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,141 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'rubygems'
5
+ require 'graphviz'
6
+
7
+ $:.unshift('./lib')
8
+ require 'greeb'
9
+
10
+ origin = <<-END
11
+ - Сынок, чего это от тебя зигами пахнет,
12
+ опять на Манежную площадь ходил?
13
+
14
+ - Нет мама, я в метро ехал, там назиговано было!!
15
+
16
+
17
+
18
+ Четырнадцать, восемьдесять восемь: 14/88.
19
+ Вот так блять
20
+ END
21
+ origin.chomp!
22
+
23
+ def identify(token)
24
+ case token
25
+ when Greeb::RU_LEX then 'RU_LEX'
26
+ when Greeb::EN_LEX then 'EN_LEX'
27
+ when Greeb::EOL then 'EOL'
28
+ when Greeb::SEP then 'SEP'
29
+ when Greeb::PUN then 'PUN'
30
+ when Greeb::SPUN then 'SPUN'
31
+ when Greeb::DIG then 'DIG'
32
+ when Greeb::DIL then 'DIL'
33
+ else
34
+ '?!'
35
+ end
36
+ end
37
+
38
+ greeb = Greeb::Parser.new(origin)
39
+ text = greeb.tree
40
+
41
+ g = GraphViz.new('graphematics', 'type' => 'graph')
42
+
43
+ g.node[:color] = '#ddaa66'
44
+ g.node[:style] = 'filled'
45
+ g.node[:shape] = 'box'
46
+ g.node[:penwidth] = '1'
47
+ g.node[:fontname] = 'PT Sans'
48
+ g.node[:fontsize] = '8'
49
+ g.node[:fillcolor]= '#ffeecc'
50
+ g.node[:fontcolor]= '#775500'
51
+ g.node[:margin] = '0.0'
52
+
53
+ g.edge[:color] = '#999999'
54
+ g.edge[:weight] = '1'
55
+ g.edge[:fontname] = 'PT Sans'
56
+ g.edge[:fontcolor]= '#444444'
57
+ g.edge[:fontsize] = '6'
58
+ g.edge[:dir] = 'forward'
59
+ g.edge[:arrowsize]= '0.5'
60
+
61
+ bid = 'begin'
62
+ g.add_node(bid).tap do |node|
63
+ node.label = "Начало\nтекста"
64
+ node.shape = 'ellipse'
65
+ node.style = ''
66
+ end
67
+
68
+ eid = 'end'
69
+ g.add_node(eid).tap do |node|
70
+ node.label = "Конец\nтекста"
71
+ node.shape = 'ellipse'
72
+ node.style = ''
73
+ end
74
+
75
+ tree = text.map_with_index do |paragraph, i|
76
+ pid = "p#{i}"
77
+ sentences = paragraph.map_with_index do |sentence, j|
78
+ sid = "#{pid}s#{j}"
79
+ subsentences = sentence.map_with_index do |subsentence, k|
80
+ ssid = "#{sid}ss#{k}"
81
+ tokens = subsentence.map_with_index do |token, l|
82
+ next if ' ' == token
83
+ [ "#{ssid}t#{l}", token, l ]
84
+ end
85
+ tokens.delete(nil)
86
+ [ ssid, tokens, k ]
87
+ end
88
+ [ sid, subsentences, j ]
89
+ end
90
+ [ pid, sentences, i ]
91
+ end
92
+
93
+ tree.each do |pid, paragraph, i|
94
+ g.add_node(pid).tap do |node|
95
+ node.label = "Абзац\n№#{i + 1}"
96
+ node.shape = 'ellipse'
97
+ end
98
+ g.add_edge(bid, pid)
99
+
100
+ paragraph.each do |sid, sentence, j|
101
+ g.add_node(sid).tap do |node|
102
+ node.label = "Предложение\n№#{j + 1}"
103
+ node.shape = 'ellipse'
104
+ end
105
+ g.add_edge(pid, sid)
106
+
107
+ sentence.each do |ssid, subsentence, k|
108
+ g.add_node(ssid).tap do |node|
109
+ node.label = "Подпредложение\n№#{k + 1}"
110
+ node.shape = 'ellipse'
111
+ end
112
+ g.add_edge(sid, ssid)
113
+
114
+ subsentence.each do |tid, token, l|
115
+ g.add_node(tid).label = token
116
+ g.add_edge(ssid, tid).label = identify(token)
117
+ g.add_edge(tid, eid)
118
+ end
119
+
120
+ subsentence.each_cons(2) do |(tid1, token1, l1),
121
+ (tid2, token2, l2)|
122
+ g.add_edge(tid1, tid2).tap do |edge|
123
+ edge.weight = 0.25
124
+ edge.style = 'dashed'
125
+ end
126
+ end
127
+ end
128
+
129
+ sentence.each_cons(2) do |(ssid1, subsentence1, k1),
130
+ (ssid2, subsentence2, k2)|
131
+ tid1, token1, l1 = subsentence1.last
132
+ tid2, token2, l2 = subsentence2.first
133
+ g.add_edge(tid1, tid2).tap do |edge|
134
+ edge.weight = 0.5
135
+ edge.style = 'dashed'
136
+ end
137
+ end
138
+ end
139
+ end
140
+
141
+ g.output(:output => 'png', :file => 'graph.png')
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+
3
+ $:.push File.expand_path('../lib', __FILE__)
4
+ require 'greeb/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'greeb'
8
+ s.version = Greeb::VERSION
9
+ s.platform = Gem::Platform::RUBY
10
+ s.authors = [ 'Dmitry A. Ustalov' ]
11
+ s.email = [ 'dmitry@eveel.ru' ]
12
+ s.homepage = 'https://github.com/eveel/greeb'
13
+ s.summary = 'Greeb is a Graphematical Analyzer.'
14
+ s.description = 'Greeb is a Graphematical Analyzer, ' \
15
+ 'written in Ruby.'
16
+
17
+ s.rubyforge_project = 'greeb'
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
+ s.require_paths = [ 'lib' ]
23
+ end
@@ -0,0 +1,8 @@
1
+ # encoding: utf-8
2
+
3
+ module Enumerable
4
+ def collect_with_index(i = -1)
5
+ collect { |e| yield(e, i += 1) }
6
+ end
7
+ alias map_with_index collect_with_index
8
+ end
@@ -0,0 +1,144 @@
1
+ # encoding: utf-8
2
+
3
+ require 'meta_array'
4
+ require 'enumerable'
5
+
6
+ module Greeb
7
+ RU_LEX = /^[А-Яа-я]+$/u
8
+ EN_LEX = /^[A-Za-z]+$/u
9
+ EOL = /^\n+$/u
10
+ SEP = /^[*=_\/\\ ]$/u
11
+ PUN = /^(\.|\!|\?)$/u
12
+ SPUN = /^(\,|\[|\]|\(|\)|\-|:|;)$/u
13
+ DIG = /^[0-9]+$/u
14
+ DIL = /^[А-Яа-яA-Za-z0-9]+$/u
15
+ EMPTY = ''
16
+
17
+ class Parser
18
+ attr_accessor :origin
19
+ private :origin=
20
+
21
+ attr_writer :tree
22
+ private :tree=
23
+
24
+ def initialize(origin)
25
+ self.origin = origin
26
+ end
27
+
28
+ def tree
29
+ @tree ||= parse(origin)
30
+ end
31
+
32
+ private
33
+ def parse(origin) # :nodoc:
34
+ tree = MetaArray.new
35
+
36
+ # paragraph
37
+ p_id = 0
38
+
39
+ # sentence
40
+ s_id = 0
41
+
42
+ # subsentence
43
+ ss_id = 0
44
+
45
+ token = ''
46
+
47
+ origin.each_char do |c|
48
+ puts "[#{token.inspect}] ← #{c.inspect}"
49
+ case c
50
+ when EOL then begin
51
+ case token
52
+ when EMPTY then token << c
53
+ when EOL then begin
54
+ token = ''
55
+ p_id += 1
56
+ s_id = 0
57
+ ss_id = 0
58
+ end
59
+ else
60
+ tree[p_id][s_id][ss_id] << token
61
+ token = c
62
+ end
63
+ end
64
+ when SEP then begin
65
+ case token
66
+ when EMPTY
67
+ else
68
+ tree[p_id][s_id][ss_id] << token
69
+ while tree[p_id][s_id][ss_id].last == c
70
+ tree[p_id][s_id][ss_id].pop
71
+ end
72
+ tree[p_id][s_id][ss_id] << c
73
+ token = ''
74
+ end
75
+ end
76
+ when PUN then begin
77
+ case token
78
+ when EMPTY
79
+ else
80
+ tree[p_id][s_id][ss_id] << token
81
+ tree[p_id][s_id][ss_id] << c
82
+ token = ''
83
+ s_id += 1
84
+ ss_id = 0
85
+ end
86
+ end
87
+ when SPUN then begin
88
+ case token
89
+ when EMPTY
90
+ else
91
+ tree[p_id][s_id][ss_id] << token
92
+ tree[p_id][s_id][ss_id] << c
93
+ token = ''
94
+ ss_id += 1
95
+ end
96
+ end
97
+ when RU_LEX then begin
98
+ case token
99
+ when EOL then begin
100
+ tree[p_id][s_id][ss_id] << ' '
101
+ token = c
102
+ end
103
+ else
104
+ token << c
105
+ end
106
+ end
107
+ when EN_LEX then begin
108
+ case token
109
+ when EOL then begin
110
+ tree[p_id][s_id][ss_id] << ' '
111
+ token = c
112
+ end
113
+ else
114
+ token << c
115
+ end
116
+ end
117
+ when DIG then begin
118
+ case token
119
+ when EOL then begin
120
+ tree[p_id][s_id][ss_id] << ' '
121
+ token = c
122
+ end
123
+ else
124
+ token << c
125
+ end
126
+ end
127
+ when DIL then begin
128
+ case token
129
+ when EOL then begin
130
+ tree[p_id][s_id][ss_id] << token
131
+ token = c
132
+ end
133
+ else
134
+ token << c
135
+ end
136
+ end
137
+ end
138
+ end
139
+ tree[p_id][s_id][ss_id] << token
140
+ tree.delete(nil)
141
+ tree.to_a
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+
3
+ module Greeb
4
+ VERSION = "0.0.1"
5
+ end
@@ -0,0 +1,11 @@
1
+ # encoding: utf-8
2
+
3
+ class MetaArray < Array
4
+ def [] id
5
+ super(id) or begin
6
+ self.class.new.tap do |element|
7
+ self[id] = element
8
+ end
9
+ end
10
+ end
11
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: greeb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease: !!null
6
+ platform: ruby
7
+ authors:
8
+ - Dmitry A. Ustalov
9
+ autorequire: !!null
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-02-06 00:00:00.000000000 +05:00
13
+ default_executable: !!null
14
+ dependencies: []
15
+ description: Greeb is a Graphematical Analyzer, written in Ruby.
16
+ email:
17
+ - dmitry@eveel.ru
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - Gemfile
24
+ - README
25
+ - Rakefile
26
+ - greeb-test.rb
27
+ - greeb.gemspec
28
+ - lib/enumerable.rb
29
+ - lib/greeb.rb
30
+ - lib/greeb/version.rb
31
+ - lib/meta_array.rb
32
+ has_rdoc: true
33
+ homepage: https://github.com/eveel/greeb
34
+ licenses: []
35
+ post_install_message: !!null
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project: greeb
53
+ rubygems_version: 1.5.0
54
+ signing_key: !!null
55
+ specification_version: 3
56
+ summary: Greeb is a Graphematical Analyzer.
57
+ test_files: []