rley 0.7.00 → 0.7.01
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +51 -34
- data/.travis.yml +10 -9
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -1
- data/appveyor.yml +10 -8
- data/examples/NLP/benchmark_pico_en.rb +3 -2
- data/examples/NLP/engtagger.rb +23 -12
- data/examples/NLP/nano_eng/nano_en_demo.rb +4 -3
- data/examples/NLP/pico_en_demo.rb +3 -2
- data/examples/data_formats/JSON/json_ast_nodes.rb +3 -0
- data/examples/data_formats/JSON/json_demo.rb +1 -0
- data/examples/data_formats/JSON/json_lexer.rb +2 -1
- data/lib/rley/base/dotted_item.rb +2 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +8 -7
- data/lib/rley/gfg/grm_flow_graph.rb +2 -0
- data/lib/rley/gfg/item_vertex.rb +2 -0
- data/lib/rley/gfg/vertex.rb +2 -1
- data/lib/rley/lexical/token.rb +5 -4
- data/lib/rley/parse_forest_visitor.rb +7 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +1 -1
- data/lib/rley/parse_rep/parse_rep_creator.rb +2 -2
- data/lib/rley/parse_rep/parse_tree_builder.rb +1 -0
- data/lib/rley/parse_tree_visitor.rb +2 -0
- data/lib/rley/parser/error_reason.rb +8 -6
- data/lib/rley/parser/gfg_chart.rb +5 -5
- data/lib/rley/parser/gfg_parsing.rb +10 -5
- data/lib/rley/parser/parse_entry_tracker.rb +1 -0
- data/lib/rley/parser/parse_state.rb +2 -1
- data/lib/rley/parser/parse_state_tracker.rb +1 -0
- data/lib/rley/parser/parse_walker_factory.rb +7 -1
- data/lib/rley/ptree/parse_tree_node.rb +1 -0
- data/lib/rley/sppf/parse_forest.rb +9 -7
- data/lib/rley/syntax/grammar.rb +10 -6
- data/lib/rley/syntax/grammar_builder.rb +2 -2
- data/lib/rley/syntax/grm_symbol.rb +1 -0
- data/lib/support/base_tokenizer.rb +10 -96
- data/spec/rley/engine_spec.rb +3 -3
- data/spec/rley/gfg/grm_flow_graph_spec.rb +1 -0
- data/spec/rley/parse_forest_visitor_spec.rb +63 -38
- data/spec/rley/parse_rep/groucho_spec.rb +9 -8
- data/spec/rley/parse_tree_visitor_spec.rb +1 -1
- data/spec/rley/parser/gfg_earley_parser_spec.rb +7 -7
- data/spec/rley/parser/gfg_parsing_spec.rb +1 -3
- data/spec/rley/parser/parse_entry_spec.rb +1 -1
- data/spec/rley/support/expectation_helper.rb +2 -1
- data/spec/rley/support/grammar_ambig01_helper.rb +4 -3
- data/spec/rley/support/grammar_arr_int_helper.rb +5 -4
- data/spec/rley/support/grammar_b_expr_helper.rb +5 -4
- data/spec/rley/support/grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_l0_helper.rb +3 -2
- data/spec/rley/support/grammar_pb_helper.rb +5 -28
- data/spec/support/base_tokenizer_spec.rb +7 -9
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1d453e82683bb3a51986dad86b0b34d100fd4c27
|
|
4
|
+
data.tar.gz: bce3fa7704cb65670102ecfcb78ee762bdad7ba5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bfba908cc187a280ed9a236414cd79ff9880abc96997acdb8c33a4c36f39731ca9b2d4c99be6e2adfb45ef053bd275ba052d8a2d61e851f6607b9ebd2248d1c8
|
|
7
|
+
data.tar.gz: 1b20d6ebe85f9d174a7ce8e2a5729e7eaa09fffa0550eb6e586592dd24d4b3cc0c309139b1d2368178309a2ed1eb411c3e07e05509152491961248c8a61492ea
|
data/.rubocop.yml
CHANGED
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
AllCops:
|
|
2
2
|
Exclude:
|
|
3
3
|
- 'features/**/*'
|
|
4
|
-
- 'exp/**/*'
|
|
4
|
+
- 'exp/**/*'
|
|
5
5
|
- 'gems/**/*'
|
|
6
6
|
- 'refs/**/*'
|
|
7
|
-
|
|
7
|
+
|
|
8
8
|
# This is disabled because some demos use UTF-8
|
|
9
9
|
AsciiComments:
|
|
10
10
|
Enabled: false
|
|
11
|
-
|
|
11
|
+
|
|
12
12
|
Attr:
|
|
13
13
|
Enabled: false
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
BlockComments:
|
|
16
16
|
Enabled: false
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
CaseIndentation:
|
|
19
19
|
EnforcedStyle: end
|
|
20
20
|
IndentOneStep: true
|
|
@@ -23,89 +23,106 @@ CaseIndentation:
|
|
|
23
23
|
# Which is contrary to modelling practice.
|
|
24
24
|
ClassCheck:
|
|
25
25
|
Enabled: false
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
ClassLength:
|
|
28
28
|
Max: 250
|
|
29
|
-
CountComments: false
|
|
29
|
+
CountComments: false
|
|
30
30
|
|
|
31
|
-
ConstantName:
|
|
31
|
+
ConstantName:
|
|
32
32
|
Enabled: false
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
CyclomaticComplexity:
|
|
35
35
|
Enabled: false
|
|
36
|
-
|
|
37
|
-
DefWithParentheses:
|
|
36
|
+
|
|
37
|
+
DefWithParentheses:
|
|
38
38
|
Enabled: false
|
|
39
|
-
|
|
39
|
+
|
|
40
40
|
Documentation:
|
|
41
41
|
Enabled: false
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
EmptyLines:
|
|
44
|
-
Enabled: false
|
|
44
|
+
Enabled: false
|
|
45
45
|
|
|
46
46
|
Encoding:
|
|
47
47
|
Enabled: false
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
EndOfLine:
|
|
50
50
|
Enabled: false
|
|
51
51
|
# SupportedStyles: lf
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
IndentationWidth
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
IndentationWidth:
|
|
55
55
|
Enabled: false
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
Layout/BlockAlignment:
|
|
58
|
+
Enabled: false
|
|
59
|
+
|
|
60
|
+
Layout/ClosingHeredocIndentation:
|
|
59
61
|
Enabled: false
|
|
60
62
|
|
|
61
63
|
# Enabled after end of support of Rubies < 2.3
|
|
62
64
|
Layout/IndentHeredoc:
|
|
63
65
|
Enabled: false
|
|
64
66
|
|
|
67
|
+
Layout/SpaceInsideArrayLiteralBrackets:
|
|
68
|
+
Enabled: false
|
|
69
|
+
|
|
65
70
|
Metrics/AbcSize:
|
|
66
71
|
Max: 50
|
|
67
|
-
|
|
72
|
+
|
|
68
73
|
# Avoid methods longer than 50 lines of code
|
|
69
74
|
Metrics/MethodLength:
|
|
70
75
|
Max: 50
|
|
71
|
-
CountComments: false
|
|
76
|
+
CountComments: false
|
|
72
77
|
|
|
73
|
-
# Avoid modules longer than 200 lines of code
|
|
78
|
+
# Avoid modules longer than 200 lines of code
|
|
74
79
|
Metrics/ModuleLength:
|
|
75
80
|
CountComments: false
|
|
76
|
-
Max: 200
|
|
81
|
+
Max: 200
|
|
77
82
|
|
|
78
83
|
Metrics/PerceivedComplexity:
|
|
79
84
|
Enabled: true
|
|
80
85
|
Max: 50
|
|
81
86
|
|
|
87
|
+
# Disable this because it produces false negatives
|
|
88
|
+
Naming/HeredocDelimiterNaming:
|
|
89
|
+
Enabled: false
|
|
90
|
+
|
|
82
91
|
Naming/MethodName:
|
|
83
92
|
Enabled: false
|
|
84
|
-
|
|
93
|
+
|
|
94
|
+
Naming/UncommunicativeMethodParamName:
|
|
95
|
+
Enabled: false
|
|
96
|
+
|
|
85
97
|
NonNilCheck:
|
|
86
98
|
Enabled: false
|
|
87
99
|
|
|
88
100
|
NumericLiterals:
|
|
89
101
|
Enabled: false
|
|
90
|
-
|
|
102
|
+
|
|
91
103
|
RaiseArgs:
|
|
92
104
|
Enabled: false
|
|
93
|
-
|
|
105
|
+
|
|
94
106
|
RedundantReturn:
|
|
95
107
|
Enabled: false
|
|
96
108
|
|
|
97
|
-
|
|
109
|
+
Style/CommentedKeyword:
|
|
110
|
+
Enabled: false
|
|
111
|
+
|
|
112
|
+
Style/ConditionalAssignment:
|
|
113
|
+
Enabled: false
|
|
114
|
+
|
|
115
|
+
Style/Lambda:
|
|
116
|
+
Enabled: false
|
|
117
|
+
|
|
118
|
+
Style/MissingRespondToMissing:
|
|
98
119
|
Enabled: false
|
|
99
120
|
|
|
100
121
|
TrailingWhitespace:
|
|
101
122
|
Enabled: false
|
|
102
|
-
|
|
123
|
+
|
|
103
124
|
VariableName:
|
|
104
125
|
Enabled: false
|
|
105
126
|
|
|
106
127
|
VariableNumber:
|
|
107
|
-
Enabled: false
|
|
108
|
-
|
|
109
|
-
Style/CommentedKeyword:
|
|
110
|
-
Enabled: false
|
|
111
|
-
|
|
128
|
+
Enabled: false
|
data/.travis.yml
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
language: ruby
|
|
2
2
|
rvm:
|
|
3
|
-
- 2.
|
|
3
|
+
- 2.6.0
|
|
4
|
+
- 2.5.3
|
|
5
|
+
- 2.4.5
|
|
6
|
+
- 2.3.8
|
|
7
|
+
- 2.2.10
|
|
4
8
|
- 2.1.10
|
|
5
|
-
- 2.
|
|
6
|
-
- 2.3.6
|
|
7
|
-
- 2.4.2
|
|
8
|
-
- 2.5.0
|
|
9
|
+
- 2.0.0-p648
|
|
9
10
|
- ruby-head
|
|
10
|
-
- jruby-9.1.
|
|
11
|
-
- jruby-head
|
|
11
|
+
- jruby-9.1.9.0
|
|
12
|
+
- jruby-head
|
|
12
13
|
matrix:
|
|
13
14
|
allow_failures:
|
|
14
15
|
- rvm: ruby-head
|
|
@@ -16,8 +17,8 @@ matrix:
|
|
|
16
17
|
|
|
17
18
|
gemfile:
|
|
18
19
|
- Gemfile
|
|
19
|
-
|
|
20
|
+
|
|
20
21
|
# whitelist
|
|
21
|
-
branches:
|
|
22
|
+
branches:
|
|
22
23
|
only:
|
|
23
24
|
- master
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
### 0.7.01 / 2019-01-03
|
|
2
|
+
- Maintenance release.
|
|
3
|
+
|
|
4
|
+
* [CHANGE] Code re-styling to please Rubocop 0.62.0.
|
|
5
|
+
* [CHANGE] File `.travis.yml`: updated Ruby versions.
|
|
6
|
+
* [CHANGE] File `appveyor.yml` updated Ruby versions.
|
|
7
|
+
* [CHANGE] File `README.me` removal obsolete icon.
|
|
8
|
+
* [CHANGE] File `LICENSE.txt` Updated copyright years.
|
|
9
|
+
|
|
1
10
|
### 0.7.00 / 2018-11-24
|
|
2
11
|
- Version bump. Core class `Token` is changed.
|
|
3
12
|
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
[](https://ci.appveyor.com/project/famished-tiger/rley)
|
|
5
5
|
[](https://coveralls.io/r/famished-tiger/Rley?branch=master)
|
|
6
6
|
[](http://badge.fury.io/rb/rley)
|
|
7
|
-
[](https://gemnasium.com/famished-tiger/Rley)
|
|
8
7
|
[](http://inch-ci.org/github/famished-tiger/Rley)
|
|
9
8
|
[](https://github.com/famished-tiger/SRL-Ruby/blob/master/LICENSE.txt)
|
|
10
9
|
|
data/appveyor.yml
CHANGED
|
@@ -2,16 +2,18 @@ version: '{build}'
|
|
|
2
2
|
max_jobs: 3
|
|
3
3
|
environment:
|
|
4
4
|
matrix:
|
|
5
|
-
|
|
6
|
-
- Ruby_version:
|
|
7
|
-
- Ruby_version: 21
|
|
8
|
-
- Ruby_version: 21-x64
|
|
9
|
-
- Ruby_version: 22
|
|
10
|
-
- Ruby_version: 22-x64
|
|
11
|
-
- Ruby_version: 23
|
|
5
|
+
#- Ruby_version: 25-x64
|
|
6
|
+
- Ruby_version: 24-x64
|
|
12
7
|
- Ruby_version: 23-x64
|
|
8
|
+
- Ruby_version: 22-x64
|
|
9
|
+
- Ruby_version: 21-x64
|
|
10
|
+
- Ruby_version: 200-x64
|
|
11
|
+
#- Ruby_version: 25
|
|
13
12
|
- Ruby_version: 24
|
|
14
|
-
- Ruby_version:
|
|
13
|
+
- Ruby_version: 23
|
|
14
|
+
- Ruby_version: 22
|
|
15
|
+
- Ruby_version: 21
|
|
16
|
+
- Ruby_version: 200
|
|
15
17
|
|
|
16
18
|
install:
|
|
17
19
|
- set PATH=C:\Ruby%Ruby_version%\bin;%PATH%
|
|
@@ -5,7 +5,7 @@ require 'rley' # Load Rley library
|
|
|
5
5
|
|
|
6
6
|
########################################
|
|
7
7
|
# Step 0. Instantiate facade object of Rley library.
|
|
8
|
-
# It provides a unified, higher-level interface
|
|
8
|
+
# It provides a unified, higher-level interface
|
|
9
9
|
engine = Rley::Engine.new
|
|
10
10
|
|
|
11
11
|
########################################
|
|
@@ -67,8 +67,9 @@ def tokenizer(aTextToParse)
|
|
|
67
67
|
tokens = aTextToParse.scan(/\S+/).map do |word|
|
|
68
68
|
term_name = Lexicon[word]
|
|
69
69
|
raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
|
|
70
|
+
|
|
70
71
|
pos = Rley::Lexical::Position.new(1, offset + 1)
|
|
71
|
-
offset += word.length
|
|
72
|
+
offset += word.length
|
|
72
73
|
Rley::Lexical::Token.new(word, term_name, pos)
|
|
73
74
|
end
|
|
74
75
|
|
data/examples/NLP/engtagger.rb
CHANGED
|
@@ -2,12 +2,13 @@ require 'rley'
|
|
|
2
2
|
require 'engtagger' # Load POS (Part-Of-Speech) tagger EngTagger
|
|
3
3
|
|
|
4
4
|
# REGEX to remove XML tags from Engtagger output
|
|
5
|
-
GET_TAG = /<(.+?)>(.*?)
|
|
5
|
+
GET_TAG = /<(.+?)>(.*?)<.+?>/.freeze
|
|
6
6
|
|
|
7
7
|
# Text tokenizer
|
|
8
8
|
# Taken directly from Engtagger, will ensure uniform indexing while parsing
|
|
9
9
|
def clean_text(text)
|
|
10
10
|
return false unless valid_text(text)
|
|
11
|
+
|
|
11
12
|
text = text.toutf8
|
|
12
13
|
cleaned_text = text
|
|
13
14
|
tokenized = []
|
|
@@ -48,13 +49,14 @@ def split_sentences(array)
|
|
|
48
49
|
va wash wis wisc wy wyo usafa alta man ont que sask yuk]
|
|
49
50
|
month = %w[jan feb mar apr may jun jul aug sep sept oct nov dec]
|
|
50
51
|
misc = %w[vs etc no esp]
|
|
51
|
-
abbr =
|
|
52
|
+
abbr = {}
|
|
52
53
|
[people, army, inst, place, comp, state, month, misc].flatten.each do |i|
|
|
53
54
|
abbr[i] = true
|
|
54
55
|
end
|
|
55
|
-
words =
|
|
56
|
+
words = []
|
|
56
57
|
tokenized.each_with_index do |_t, i|
|
|
57
|
-
if tokenized[i + 1] &&
|
|
58
|
+
if tokenized[i + 1] &&
|
|
59
|
+
tokenized [i + 1] =~ /[A-Z\W]/ && tokenized[i] =~ /\A(.+)\.\z/
|
|
58
60
|
w = $1
|
|
59
61
|
# Don't separate the period off words that
|
|
60
62
|
# meet any of the following conditions:
|
|
@@ -62,8 +64,9 @@ def split_sentences(array)
|
|
|
62
64
|
# 1. It is defined in one of the lists above
|
|
63
65
|
# 2. It is only one letter long: Alfred E. Sloan
|
|
64
66
|
# 3. It has a repeating letter-dot: U.S.A. or J.C. Penney
|
|
65
|
-
unless abbr[w.downcase] ||
|
|
66
|
-
|
|
67
|
+
unless abbr[w.downcase] ||
|
|
68
|
+
w =~ /\A[a-z]\z/i || w =~ /[a-z](?:\.[a-z])+\z/i
|
|
69
|
+
words << w
|
|
67
70
|
words << '.'
|
|
68
71
|
next
|
|
69
72
|
end
|
|
@@ -83,15 +86,20 @@ end
|
|
|
83
86
|
def split_punct(text)
|
|
84
87
|
# If there's no punctuation, return immediately
|
|
85
88
|
return [text] if /\A\w+\z/ =~ text
|
|
89
|
+
|
|
86
90
|
# Sanity checks
|
|
87
91
|
text = text.gsub(/\W{10,}/o, ' ')
|
|
88
92
|
|
|
89
93
|
# Put quotes into a standard format
|
|
90
94
|
text = text.gsub(/`(?!`)(?=.*\w)/o, '` ') # Shift left quotes off text
|
|
91
95
|
text = text.gsub(/"(?=.*\w)/o, ' `` ') # Convert left quotes to ``
|
|
92
|
-
|
|
96
|
+
|
|
97
|
+
# Convert left quote to `
|
|
98
|
+
text = text.gsub(/(\W|^)'(?=.*\w)/o) { $1 ? $1 + ' ` ' : ' ` ' }
|
|
93
99
|
text = text.gsub(/"/, " '' ") # Convert (remaining) quotes to ''
|
|
94
|
-
|
|
100
|
+
|
|
101
|
+
# Separate right single quotes
|
|
102
|
+
text = text.gsub(/(\w)'(?!')(?=\W|$)/o, "\\1 ' ")
|
|
95
103
|
|
|
96
104
|
# Handle all other punctuation
|
|
97
105
|
text = text.gsub(/--+/o, ' - ') # Convert and separate dashes
|
|
@@ -99,10 +107,13 @@ def split_punct(text)
|
|
|
99
107
|
text = text.gsub(/:/o, ' :') # Shift semicolon off
|
|
100
108
|
text = text.gsub(/(\.\.\.+)/o, ' \1 ') # Shift ellipses off
|
|
101
109
|
text = text.gsub(/([\(\[\{\}\]\)])/o, ' \1 ') # Shift off brackets
|
|
102
|
-
|
|
110
|
+
|
|
111
|
+
# Shift off other ``standard'' punctuation
|
|
112
|
+
text = text.gsub(/([\!\?#\$%;~|])/o, ' \1 ')
|
|
103
113
|
|
|
104
114
|
# English-specific contractions
|
|
105
|
-
|
|
115
|
+
# Separate off 'd 'm 's
|
|
116
|
+
text = text.gsub(/([A-Za-z])'([dms])\b/o, "\\1 '\\2")
|
|
106
117
|
text = text.gsub(/n't\b/o, " n't") # Separate off n't
|
|
107
118
|
text = text.gsub(/'(ve|ll|re)\b/o, " '\\1") # Separate off 've, 'll, 're
|
|
108
119
|
result = text.split(' ')
|
|
@@ -139,7 +150,7 @@ tgr = EngTagger.new
|
|
|
139
150
|
tagged = tgr.add_tags(text)
|
|
140
151
|
|
|
141
152
|
# Generte tokenied lexicon of input text
|
|
142
|
-
# Instead of creating a lexicon dictionary,
|
|
153
|
+
# Instead of creating a lexicon dictionary,
|
|
143
154
|
# we would simply generate one each time on the fly for the current text only.
|
|
144
155
|
lexicon = clean_text(text)
|
|
145
156
|
|
|
@@ -153,7 +164,7 @@ def tokenizer(lexicon, tokens)
|
|
|
153
164
|
term_name = tokens[i].last
|
|
154
165
|
rank = Rley::Lexical::Position.new(1, pos + 1)
|
|
155
166
|
pos += word.length + 1 # Assuming one space between words.
|
|
156
|
-
rley_tokens << Rley::Lexical::Token.new(word, term_name,
|
|
167
|
+
rley_tokens << Rley::Lexical::Token.new(word, term_name, rank)
|
|
157
168
|
end
|
|
158
169
|
return rley_tokens
|
|
159
170
|
end
|
|
@@ -68,10 +68,10 @@ Lexicon = {
|
|
|
68
68
|
# Step 4. Creating a tokenizer
|
|
69
69
|
# A tokenizer reads the input string and converts it into a sequence of tokens
|
|
70
70
|
# Highly simplified tokenizer implementation.
|
|
71
|
-
def tokenizer(aTextToParse)
|
|
71
|
+
def tokenizer(aTextToParse)
|
|
72
72
|
scanner = StringScanner.new(aTextToParse)
|
|
73
73
|
tokens = []
|
|
74
|
-
|
|
74
|
+
|
|
75
75
|
loop do
|
|
76
76
|
scanner.skip(/\s+/)
|
|
77
77
|
curr_pos = scanner.pos
|
|
@@ -80,11 +80,12 @@ def tokenizer(aTextToParse)
|
|
|
80
80
|
|
|
81
81
|
term_name = Lexicon[word]
|
|
82
82
|
raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
|
|
83
|
+
|
|
83
84
|
pos = Rley::Lexical::Position.new(1, curr_pos + 1)
|
|
84
85
|
tokens << Rley::Lexical::Token.new(word, term_name, pos)
|
|
85
86
|
end
|
|
86
87
|
|
|
87
|
-
return tokens
|
|
88
|
+
return tokens
|
|
88
89
|
end
|
|
89
90
|
|
|
90
91
|
|
|
@@ -64,7 +64,7 @@ Lexicon = {
|
|
|
64
64
|
def tokenizer(aTextToParse)
|
|
65
65
|
scanner = StringScanner.new(aTextToParse)
|
|
66
66
|
tokens = []
|
|
67
|
-
|
|
67
|
+
|
|
68
68
|
loop do
|
|
69
69
|
scanner.skip(/\s+/)
|
|
70
70
|
curr_pos = scanner.pos
|
|
@@ -73,6 +73,7 @@ def tokenizer(aTextToParse)
|
|
|
73
73
|
|
|
74
74
|
term_name = Lexicon[word]
|
|
75
75
|
raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
|
|
76
|
+
|
|
76
77
|
pos = Rley::Lexical::Position.new(1, curr_pos + 1)
|
|
77
78
|
tokens << Rley::Lexical::Token.new(word, term_name, pos)
|
|
78
79
|
end
|
|
@@ -94,7 +95,7 @@ unless result.success?
|
|
|
94
95
|
puts result.failure_reason.message
|
|
95
96
|
exit(1)
|
|
96
97
|
end
|
|
97
|
-
|
|
98
|
+
|
|
98
99
|
########################################
|
|
99
100
|
# Step 6. Generating a parse tree from parse result
|
|
100
101
|
ptree = engine.to_ptree(result)
|
|
@@ -29,6 +29,7 @@ JSONTerminalNode = Struct.new(:token, :value, :position) do
|
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
def done!
|
|
32
|
+
# Do nothing
|
|
32
33
|
end
|
|
33
34
|
end
|
|
34
35
|
|
|
@@ -76,6 +77,7 @@ class JSONCompositeNode
|
|
|
76
77
|
end
|
|
77
78
|
|
|
78
79
|
def done!
|
|
80
|
+
# Do nothing
|
|
79
81
|
end
|
|
80
82
|
|
|
81
83
|
alias subnodes children
|
|
@@ -123,6 +125,7 @@ class JSONPair
|
|
|
123
125
|
end
|
|
124
126
|
|
|
125
127
|
def done!
|
|
128
|
+
# Do nothing
|
|
126
129
|
end
|
|
127
130
|
|
|
128
131
|
def to_ruby
|