pils 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/README.md +39 -0
- data/Rakefile +8 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/pils +3 -0
- data/lib/pils.rb +64 -0
- data/lib/pils/de.rb +24 -0
- data/lib/pils/de/skeleton.rb +207 -0
- data/lib/pils/de/small.rb +128 -0
- data/lib/pils/parsing.rb +31 -0
- data/lib/pils/parsing/cat.rb +62 -0
- data/lib/pils/parsing/grammar.rb +47 -0
- data/lib/pils/parsing/lexicon.rb +100 -0
- data/lib/pils/parsing/parser.rb +310 -0
- data/lib/pils/parsing/rule.rb +43 -0
- data/lib/pils/parsing/tree.rb +147 -0
- data/lib/pils/parsing/wordform.rb +44 -0
- data/lib/pils/structures.rb +7 -0
- data/lib/pils/structures/avm.rb +98 -0
- data/lib/pils/tcf.rb +37 -0
- data/lib/pils/tcf/annotation.rb +42 -0
- data/lib/pils/tcf/bounded_element.rb +46 -0
- data/lib/pils/tcf/geo_annotation.rb +29 -0
- data/lib/pils/tcf/named_entity_annotation.rb +31 -0
- data/lib/pils/tcf/sentence.rb +47 -0
- data/lib/pils/tcf/tcf_document.rb +296 -0
- data/lib/pils/tcf/token.rb +52 -0
- data/lib/pils/tcf/transform/transformer.rb +468 -0
- data/lib/pils/version.rb +3 -0
- data/pils-0.1.2.gem +0 -0
- data/pils.gemspec +41 -0
- data/tasks/testing.rake +23 -0
- metadata +128 -0
@@ -0,0 +1,43 @@
|
|
1
|
+
# This file is part of Pils.
|
2
|
+
#
|
3
|
+
# Pils is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Lesser General Public License as
|
5
|
+
# published by the Free Software Foundation, either version 3 of
|
6
|
+
# the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# Pils is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the
|
14
|
+
# GNU Lesser General Public License along with Pils.
|
15
|
+
# If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
module Pils
|
18
|
+
module Parsing
|
19
|
+
class Rule
|
20
|
+
attr_accessor :grammar
|
21
|
+
attr_accessor :left
|
22
|
+
attr_accessor :right
|
23
|
+
|
24
|
+
|
25
|
+
def initialize(new_left, new_right, new_grammar={})
|
26
|
+
@left = new_left
|
27
|
+
@right = new_right
|
28
|
+
@grammar = new_grammar
|
29
|
+
end
|
30
|
+
|
31
|
+
def expandible?(cat)
|
32
|
+
cat < @left
|
33
|
+
end
|
34
|
+
|
35
|
+
def display
|
36
|
+
"%s -> %s" % [left.display, right.collect{|r| r.display}.join(' ')]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# This file is part of Pils.
|
2
|
+
#
|
3
|
+
# Pils is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Lesser General Public License as
|
5
|
+
# published by the Free Software Foundation, either version 3 of
|
6
|
+
# the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# Pils is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the
|
14
|
+
# GNU Lesser General Public License along with Pils.
|
15
|
+
# If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
module Pils
|
18
|
+
module Parsing
|
19
|
+
class Tree
|
20
|
+
attr_accessor :children
|
21
|
+
attr_accessor :parent
|
22
|
+
attr_accessor :obj
|
23
|
+
attr_accessor :wordform
|
24
|
+
|
25
|
+
def initialize(new_obj, new_parent=nil, new_children=[])
|
26
|
+
@obj = new_obj
|
27
|
+
@parent = new_parent
|
28
|
+
@children = new_children
|
29
|
+
@wordform = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def clone
|
33
|
+
new_tree = Tree.new(@obj, @parent, [])
|
34
|
+
self.children.each do |c|
|
35
|
+
new_tree.children << c.clone
|
36
|
+
end
|
37
|
+
new_tree
|
38
|
+
end
|
39
|
+
|
40
|
+
def set_parent(new_parent)
|
41
|
+
@parent = new_parent
|
42
|
+
end
|
43
|
+
|
44
|
+
def set_children(new_children, recurse=false)
|
45
|
+
@children = new_children
|
46
|
+
if recurse
|
47
|
+
@children.each do |c|
|
48
|
+
c.set_parent(self)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
def ancestors
|
55
|
+
return [] if @parent.nil?
|
56
|
+
# Pils::log @parent.class.name
|
57
|
+
tst = @parent.ancestors
|
58
|
+
return [@parent, tst].flatten
|
59
|
+
end
|
60
|
+
|
61
|
+
def ancestors_from_root
|
62
|
+
ancestors.reverse
|
63
|
+
end
|
64
|
+
|
65
|
+
def child_count
|
66
|
+
@children.count
|
67
|
+
end
|
68
|
+
|
69
|
+
def leaf?
|
70
|
+
self.child_count == 0
|
71
|
+
end
|
72
|
+
|
73
|
+
def leaf_count
|
74
|
+
return 1 if self.leaf?
|
75
|
+
@children.collect{|c| c.leaf_count}.inject(0){|sum,x| sum + x }
|
76
|
+
end
|
77
|
+
|
78
|
+
def leaves
|
79
|
+
return [self] if self.leaf?
|
80
|
+
result = []
|
81
|
+
@children.each do |child|
|
82
|
+
if child.leaf?
|
83
|
+
result << child
|
84
|
+
else
|
85
|
+
child.leaves.each do |l|
|
86
|
+
result << l
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
result.uniq.flatten
|
91
|
+
end
|
92
|
+
|
93
|
+
def leaf_at(n)
|
94
|
+
self.leaves[n]
|
95
|
+
end
|
96
|
+
|
97
|
+
def set_wordform_at(n, value)
|
98
|
+
set_wordform_for_nth_leaf(n, 0, value)
|
99
|
+
end
|
100
|
+
|
101
|
+
def set_wordform_for_nth_leaf(n, counter, value)
|
102
|
+
# Pils::log "\nLooking for >#{value}< in #{obj.to_s}"
|
103
|
+
@children.each do |child|
|
104
|
+
# Pils::log " Node: #{child.obj.to_s}, leaf? #{child.leaf?}, n: #{n.to_s}, counter: #{counter.to_s}"
|
105
|
+
if child.leaf?
|
106
|
+
# Pils::log "--FOUND A LEAF: #{child.wordform}"
|
107
|
+
if counter==n
|
108
|
+
# Pils::log "Setting word form >#{value}<"
|
109
|
+
child.wordform = value
|
110
|
+
counter = counter + 1
|
111
|
+
# return true
|
112
|
+
end
|
113
|
+
# Pils::log "Not setting word form"
|
114
|
+
counter = counter + 1
|
115
|
+
else
|
116
|
+
# Pils::log "no leaf: #{child.obj.to_s}"
|
117
|
+
#counter
|
118
|
+
counter = child.set_wordform_for_nth_leaf(n, counter, value)#.children.each do |l|
|
119
|
+
# counter=resulting_counter if resulting_counter>counter
|
120
|
+
# return true if counter===true
|
121
|
+
# counter = l.set_wordform_for_nth_leaf(n-counter, value)
|
122
|
+
#end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
return counter
|
126
|
+
end
|
127
|
+
|
128
|
+
def display
|
129
|
+
if self.leaf?
|
130
|
+
if wordform
|
131
|
+
return "[.%s %s ]" % [obj.to_s, wordform]
|
132
|
+
else
|
133
|
+
return obj.to_s
|
134
|
+
end
|
135
|
+
else
|
136
|
+
# return "%s[ %s ]" % [@obj.to_s, children.collect{|c| c.display}.join(' ')]
|
137
|
+
return "[.%s %s ]" % [@obj.to_s, children.collect{|c| c.display}.join(' ')]
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
|
142
|
+
def to_s
|
143
|
+
return display
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# This file is part of Pils.
|
2
|
+
#
|
3
|
+
# Pils is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Lesser General Public License as
|
5
|
+
# published by the Free Software Foundation, either version 3 of
|
6
|
+
# the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# Pils is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the
|
14
|
+
# GNU Lesser General Public License along with Pils.
|
15
|
+
# If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
module Pils
|
18
|
+
module Parsing
|
19
|
+
class Wordform
|
20
|
+
|
21
|
+
attr_accessor :form
|
22
|
+
attr_accessor :cat
|
23
|
+
attr_accessor :grammar
|
24
|
+
attr_accessor :semantics
|
25
|
+
|
26
|
+
def initialize(new_form, new_cat, new_grammar={}, new_semantics={})
|
27
|
+
@form = new_form
|
28
|
+
if new_cat.kind_of?(Cat)
|
29
|
+
@cat = new_cat.cat
|
30
|
+
else
|
31
|
+
@cat = new_cat
|
32
|
+
end
|
33
|
+
|
34
|
+
@grammar = Pils::Structures::Avm.new(new_grammar)
|
35
|
+
@semantics = Pils::Structures::Avm.new(new_semantics)
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
def display()
|
40
|
+
"#{@form}/#{@cat.to_s}/#{@semantics.to_s}"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
# (c) 2019 Peter Menke
|
4
|
+
#
|
5
|
+
# This file is part of pils
|
6
|
+
# ("Programming in linguistic seminars").
|
7
|
+
#
|
8
|
+
# pils is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# pils is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with pils. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
|
21
|
+
module Pils
|
22
|
+
module Structures
|
23
|
+
|
24
|
+
# This class models attribute value structures, based on hashes.
|
25
|
+
class Avm < Hash
|
26
|
+
|
27
|
+
def initialize(old_struct={})
|
28
|
+
if old_struct
|
29
|
+
old_struct.each do |k,v|
|
30
|
+
self[k] = v
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# retrieves the value stored under the given key.
|
36
|
+
# @param [Object] key the key under which to look
|
37
|
+
# @return [Object,nil] the found value or nil if no value was
|
38
|
+
# stored under the given key
|
39
|
+
def get(key)
|
40
|
+
if self.has_key?(key)
|
41
|
+
return self[key]
|
42
|
+
elsif self.has_key?(key.to_sym)
|
43
|
+
return self[key.to_sym]
|
44
|
+
elsif self.has_key?(key.to_s)
|
45
|
+
return self[key.to_s]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
def <(other)
|
51
|
+
self.keys.each do |k|
|
52
|
+
if other.has_key?(k)
|
53
|
+
return false if self.get(k) != other.get(k)
|
54
|
+
else
|
55
|
+
return false
|
56
|
+
end
|
57
|
+
end
|
58
|
+
return true
|
59
|
+
end
|
60
|
+
|
61
|
+
# unifies (merges) this AVM with another one
|
62
|
+
# @param [Pils::Structures::Avm] other the other AVM to unify
|
63
|
+
# @return [Pils::Structures::Avm,nil] the result of the
|
64
|
+
# unification or `nil` if there were conflicts
|
65
|
+
def +(other)
|
66
|
+
new_hash = {}
|
67
|
+
self.keys.each do |key|
|
68
|
+
new_hash[key]=self[key]
|
69
|
+
end
|
70
|
+
other.keys.each do |key|
|
71
|
+
return nil if new_hash.has_key?(key) && new_hash[key] != other[key]
|
72
|
+
new_hash[key]=other[key]
|
73
|
+
end
|
74
|
+
return Avm.new(new_hash)
|
75
|
+
end
|
76
|
+
|
77
|
+
def fits_to_describe(other)
|
78
|
+
self.keys.each do |k|
|
79
|
+
sym_key = k.to_sym
|
80
|
+
# Pils::log " key: %s" % k
|
81
|
+
Pils::log " a: %s" % self[k]
|
82
|
+
if other.has_key?(k.to_sym) || other.has_key?(k.to_s)
|
83
|
+
Pils::log " b: %s" % other[k]
|
84
|
+
if self[k].kind_of?(Hash) && other[k].kind_of?(Hash)
|
85
|
+
return false if !(self[k].fits_to_describe(other[k]))
|
86
|
+
else
|
87
|
+
return false if self[k] != other[k]
|
88
|
+
end
|
89
|
+
else
|
90
|
+
return false
|
91
|
+
end
|
92
|
+
end
|
93
|
+
return true
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
data/lib/pils/tcf.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
# (c) 2019 Peter Menke
|
4
|
+
#
|
5
|
+
# This file is part of pils
|
6
|
+
# ("Programming in linguistic seminars").
|
7
|
+
#
|
8
|
+
# pils is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# pils is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with pils. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
|
21
|
+
module Pils
|
22
|
+
module Tcf
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
require "pils/tcf/annotation"
|
28
|
+
require "pils/tcf/bounded_element"
|
29
|
+
require "pils/tcf/geo_annotation"
|
30
|
+
require "pils/tcf/named_entity_annotation"
|
31
|
+
require "pils/tcf/sentence"
|
32
|
+
require "pils/tcf/tcf_document"
|
33
|
+
require "pils/tcf/token"
|
34
|
+
require "pils/tcf/transform/transformer"
|
35
|
+
|
36
|
+
|
37
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
# (c) 2019 Peter Menke
|
4
|
+
#
|
5
|
+
# This file is part of pils
|
6
|
+
# ("Programming in linguistic seminars").
|
7
|
+
#
|
8
|
+
# pils is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# pils is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with pils. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
|
21
|
+
module Pils
|
22
|
+
module Tcf
|
23
|
+
|
24
|
+
class Annotation
|
25
|
+
|
26
|
+
def initialize(tcf_document)
|
27
|
+
@tcf_document = tcf_document
|
28
|
+
@tokens = Array.new
|
29
|
+
end
|
30
|
+
|
31
|
+
def tokens
|
32
|
+
@tokens
|
33
|
+
end
|
34
|
+
|
35
|
+
def <<(token)
|
36
|
+
@tokens << token
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
# (c) 2019 Peter Menke
|
4
|
+
#
|
5
|
+
# This file is part of pils
|
6
|
+
# ("Programming in linguistic seminars").
|
7
|
+
#
|
8
|
+
# pils is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# pils is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU General Public License
|
19
|
+
# along with pils. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
|
21
|
+
module Pils
|
22
|
+
module Tcf
|
23
|
+
|
24
|
+
class BoundedElement
|
25
|
+
|
26
|
+
attr_accessor :begin_index
|
27
|
+
attr_accessor :end_index
|
28
|
+
|
29
|
+
|
30
|
+
def boundaries=(new_boundaries)
|
31
|
+
@begin_index=new_boundaries.first
|
32
|
+
@end_index=new_boundaries.last
|
33
|
+
end
|
34
|
+
|
35
|
+
def boundaries?
|
36
|
+
@begin_index && @end_index
|
37
|
+
end
|
38
|
+
|
39
|
+
def length
|
40
|
+
end_index - begin_index
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|