zenlish 0.1.12 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +7 -7
- data/lib/zenlish/lex/empty_lexicon_factory.rb +1 -0
- data/lib/zenlish/parser/zenlish_grammar.rb +8 -0
- data/lib/zenlish/version.rb +1 -1
- data/lib/zenlish/wclasses/all_word_classes.rb +1 -0
- data/lib/zenlish/wclasses/indefinite_article.rb +9 -0
- data/spec/zenlish/parser/zparser_spec.rb +68 -0
- data/spec/zenlish/support/minimal_lexicon.rb +9 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6f916af0c3deb8ecbc0f46cf0ed8638d93e7ce33091070ae6549ca2a29d2b58
|
4
|
+
data.tar.gz: 07b3103e32b1d53afd46492544592d139af39af2730290985c429cc82bcaf34a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52030f24b26543dc6ad52abe9a00ce88407e4e7fb1e6201e9c5533b7f7718e1cb66af8d8281c7a4224c12bd6db7a1825f81b97899f9efaaeef560911671632f1
|
7
|
+
data.tar.gz: 851109870bf5ecfcc65f0385c88c2aa5546a8cd3bbab59a498b0cae904857d11e0b5d835ad43327bfe923085b97fc79a56113c4a25f1d126f24ec611c555b7d6
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## [0.1.13] - 2019-10-28
|
4
|
+
__Zenlish__ can parse all sentences in lesson 1 and 2-A..2-E from
|
5
|
+
[Learn These Words First](http://learnthesewordsfirst.com/).
|
6
|
+
|
7
|
+
### Added
|
8
|
+
- Class `IndefiniteArticle` to represent the article `a (an)`.
|
9
|
+
|
10
|
+
### Changed
|
11
|
+
- File `zparser_spec.rb`: tests include all sentences from lesson 2-E (118 sentences in total).
|
12
|
+
- `ZenlishGrammar`: added production rules for adverbial and prepositional phrases put in front position.
|
13
|
+
- File `minimal_lexicon.rb`: new entries in lexicon `a`, `at`, `before`, `long`,
|
14
|
+
`short`, `move`
|
15
|
+
- File `README.md` Updated the metrics table
|
16
|
+
|
3
17
|
## [0.1.12] - 2019-10-26
|
4
18
|
__Zenlish__ can parse all sentences in lesson 1 and 2-A..2-D from
|
5
19
|
[Learn These Words First](http://learnthesewordsfirst.com/).
|
data/README.md
CHANGED
@@ -47,14 +47,14 @@ sentences of the first lesson.
|
|
47
47
|
|
48
48
|
The intent is to deliver gem versions in small increments.
|
49
49
|
|
50
|
-
#### Some project metrics (v. 0.1.
|
50
|
+
#### Some project metrics (v. 0.1.13)
|
51
51
|
|Metric|Value|
|
52
52
|
|:-:|:-:|
|
53
|
-
| Number of lemmas in lexicon |
|
54
|
-
| [Coverage 100 commonest English words](https://en.wikipedia.org/wiki/Most_common_words_in_English) |
|
55
|
-
| Number of production rules in grammar |
|
56
|
-
| Number of lessons covered |
|
57
|
-
| Number of sentences in spec files |
|
53
|
+
| Number of lemmas in lexicon | 77 |
|
54
|
+
| [Coverage 100 commonest English words](https://en.wikipedia.org/wiki/Most_common_words_in_English) | 39 |
|
55
|
+
| Number of production rules in grammar | 103 |
|
56
|
+
| Number of lessons covered | 13 |
|
57
|
+
| Number of sentences in spec files | 118 |
|
58
58
|
|
59
59
|
|
60
60
|
### Roadmap
|
@@ -62,7 +62,7 @@ The intent is to deliver gem versions in small increments.
|
|
62
62
|
Here a tentative roadmap:
|
63
63
|
|
64
64
|
#### A) Ability to parse sentences from [Learn These Words First](http://learnthesewordsfirst.com/)
|
65
|
-
*STARTED*.
|
65
|
+
*STARTED*. 14% complete
|
66
66
|
This website advocates the idea of a multi-layered dictionary.
|
67
67
|
At the core, there are about 300 essential words.
|
68
68
|
The choice of these words is inspired by the semantic primitives of [NSM
|
@@ -25,6 +25,7 @@ module Zenlish
|
|
25
25
|
aLexicon.add_terminal(WClasses::CommonNoun.new.freeze)
|
26
26
|
aLexicon.add_terminal(WClasses::ComparativeParticle.new.freeze)
|
27
27
|
aLexicon.add_terminal(WClasses::DefiniteArticle.new.freeze)
|
28
|
+
aLexicon.add_terminal(WClasses::IndefiniteArticle.new.freeze)
|
28
29
|
aLexicon.add_terminal(WClasses::DegreeAdverb.new.freeze)
|
29
30
|
aLexicon.add_terminal(WClasses::DemonstrativeDeterminer.new.freeze)
|
30
31
|
aLexicon.add_terminal(WClasses::ConjunctivePronoun.new.freeze)
|
@@ -25,11 +25,15 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
25
25
|
rule 'dependent_clause' => 'simple_sentence'
|
26
26
|
rule 'dependent_clause' => 'DemonstrativePronoun'
|
27
27
|
rule 'affirmative_sentence' => 'noun_phrase verb_phrase'
|
28
|
+
rule 'affirmative_sentence' => 'propositional_phrase Comma noun_phrase verb_phrase'
|
28
29
|
rule 'affirmative_sentence' => 'AdverbThere IrregularVerbBe verb_be_complement'
|
29
30
|
rule 'affirmative_sentence' => 'numeral_of IrregularVerbBe verb_be_complement'
|
30
31
|
rule 'affirmative_sentence' => 'DemonstrativePronoun IrregularVerbBe verb_be_complement'
|
31
32
|
rule 'affirmative_sentence' => 'DemonstrativePronoun IrregularVerb verb_complement'
|
32
33
|
rule 'affirmative_sentence' => 'conjunctive_prefix IrregularVerbBe verb_be_complement'
|
34
|
+
|
35
|
+
# Case of time adjunct adverbial put in fromt position
|
36
|
+
rule 'affirmative_sentence' => 'simple_noun_phrase Adverb Comma noun_phrase verb_phrase'
|
33
37
|
rule 'negative_sentence' => 'noun_phrase negative_verb_phrase'
|
34
38
|
rule 'negative_sentence' => 'AdverbThere negative_verb_phrase'
|
35
39
|
rule 'negative_sentence' => 'numeral_of negative_verb_phrase'
|
@@ -68,6 +72,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
68
72
|
rule 'determiner' => 'single_determiner'
|
69
73
|
rule 'single_determiner' => 'DemonstrativeDeterminer'
|
70
74
|
rule 'single_determiner' => 'DefiniteArticle'
|
75
|
+
rule 'single_determiner' => 'IndefiniteArticle'
|
71
76
|
rule 'single_determiner' => 'IndefiniteQuantifier'
|
72
77
|
rule 'verb_phrase' => 'verb_group'
|
73
78
|
rule 'verb_phrase' => 'verb_group verb_complement'
|
@@ -75,10 +80,13 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
75
80
|
rule 'verb_phrase' => 'RegularVerbWant Preposition verb_group'
|
76
81
|
rule 'verb_phrase' => 'RegularVerbWant Preposition verb_group noun_phrase'
|
77
82
|
rule 'verb_phrase' => 'IrregularVerbDo DemonstrativePronoun'
|
83
|
+
rule 'verb_phrase' => 'IrregularVerbDo DemonstrativePronoun propositional_phrase'
|
78
84
|
rule 'verb_phrase' => 'ModalVerbCan verb_group DemonstrativePronoun'
|
79
85
|
rule 'verb_phrase' => 'IrregularVerbBe verb_be_complement'
|
80
86
|
rule 'verb_phrase' => 'IrregularVerbSay Colon Quote affirmative_sentence Period Quote'
|
81
87
|
rule 'verb_complement' => 'noun_phrase'
|
88
|
+
# perception verb (hear, see, watch, notice, ...): verb + object + infinitive
|
89
|
+
rule 'verb_complement' => 'simple_noun_phrase lexical_verb'
|
82
90
|
rule 'verb_complement' => 'Adjective propositional_phrase'
|
83
91
|
rule 'verb_complement' => 'noun_phrase IrregularVerbBe verb_be_complement'
|
84
92
|
rule 'verb_complement' => 'adverb_phrase'
|
data/lib/zenlish/version.rb
CHANGED
@@ -22,6 +22,7 @@ require_relative 'definite_article'
|
|
22
22
|
require_relative 'demonstrative_determiner'
|
23
23
|
require_relative 'conjunctive_pronoun'
|
24
24
|
require_relative 'demonstrative_pronoun'
|
25
|
+
require_relative 'indefinite_article'
|
25
26
|
require_relative 'indefinite_pronoun'
|
26
27
|
require_relative 'indefinite_quantifier'
|
27
28
|
require_relative 'linking_adverb'
|
@@ -30,6 +30,7 @@ module Zenlish
|
|
30
30
|
# In absence of a POS tagger/lemmatizer, we map input words
|
31
31
|
# to variables that themselves return Literal objects.
|
32
32
|
# For instance, next line will create a variable called 'alive'
|
33
|
+
literal2var('a', 'a', '_as_art')
|
33
34
|
literal2var('about', 'about')
|
34
35
|
literal2var('above', 'above')
|
35
36
|
literal2var('alive', 'alive')
|
@@ -37,8 +38,11 @@ module Zenlish
|
|
37
38
|
literal2var('another', 'another')
|
38
39
|
def are ; Lex::Literal.new('are', get_lexeme('be', WClasses::IrregularVerbBe), 0) ; end
|
39
40
|
literal2var('as', 'as')
|
41
|
+
literal2var('at', 'at')
|
40
42
|
literal2var('bad', 'bad')
|
41
43
|
literal2var('because', 'because')
|
44
|
+
def before_adverb ; Lex::Literal.new('before', get_lexeme('before', WClasses::Adverb), 0) ; end
|
45
|
+
def before ; Lex::Literal.new('before', get_lexeme('before', WClasses::SubordinatingConjunction), 0) ; end
|
42
46
|
literal2var('big', 'big')
|
43
47
|
literal2var('big', 'bigger')
|
44
48
|
literal2var('can', 'can')
|
@@ -67,8 +71,12 @@ module Zenlish
|
|
67
71
|
literal2var('like', 'like')
|
68
72
|
literal2var('Lisa', 'Lisa')
|
69
73
|
literal2var('living', 'living')
|
74
|
+
literal2var('long', 'long')
|
70
75
|
literal2var('many', 'many')
|
71
76
|
literal2var('more', 'more')
|
77
|
+
literal2var('move', 'move')
|
78
|
+
literal2var('move', 'moved')
|
79
|
+
literal2var('move', 'moves')
|
72
80
|
literal2var('near to', 'near_to')
|
73
81
|
literal2var('not', 'not', '_')
|
74
82
|
literal2var('on', 'on')
|
@@ -79,6 +87,7 @@ module Zenlish
|
|
79
87
|
literal2var('person', 'person')
|
80
88
|
literal2var('place', 'place')
|
81
89
|
literal2var('same', 'same')
|
90
|
+
literal2var('short', 'short')
|
82
91
|
literal2var('side', 'side')
|
83
92
|
literal2var('small', 'small')
|
84
93
|
literal2var('small', 'smaller')
|
@@ -99,6 +108,7 @@ module Zenlish
|
|
99
108
|
def this ; Lex::Literal.new('this', get_lexeme('this', WClasses::DemonstrativeDeterminer), 0) ; end
|
100
109
|
def this_as_pronoun ; Lex::Literal.new('this', get_lexeme('this', WClasses::DemonstrativePronoun), 0) ; end
|
101
110
|
literal2var('this one', 'this_one')
|
111
|
+
literal2var('time', 'time')
|
102
112
|
literal2var('to', 'to')
|
103
113
|
literal2var('Tony', 'Tony')
|
104
114
|
literal2var('touch', 'touching')
|
@@ -106,6 +116,7 @@ module Zenlish
|
|
106
116
|
literal2var('two', 'two')
|
107
117
|
literal2var('very', 'very')
|
108
118
|
literal2var('want', 'wants')
|
119
|
+
def was ; Lex::Literal.new('was', get_lexeme('be', WClasses::IrregularVerbBe), 0) ; end
|
109
120
|
literal2var('what', 'what')
|
110
121
|
literal2var('with', 'with')
|
111
122
|
literal2var('word', 'words')
|
@@ -585,8 +596,65 @@ module Zenlish
|
|
585
596
|
literals = [this_as_pronoun, feels, bad, for_, tony, dot]
|
586
597
|
expect { subject.parse(literals) }.not_to raise_error
|
587
598
|
end
|
599
|
+
|
600
|
+
it 'should parse sample sentences from lesson 2-E' do
|
601
|
+
# Sentence 2-14a: "Lisa says something at this time."
|
602
|
+
literals = [lisa, says, something, at, this, time, dot]
|
603
|
+
expect { subject.parse(literals) }.not_to raise_error
|
604
|
+
|
605
|
+
# Sentence 2-14b: "Tony is not in this place at this time."
|
606
|
+
literals = [tony, is, not_, in_, this, place, at, this, time, dot]
|
607
|
+
expect { subject.parse(literals) }.not_to raise_error
|
608
|
+
|
609
|
+
# Sentence 2-15a: "At one time, Tony does something to this thing."
|
610
|
+
literals = [at, one, time, comma, tony, does, something, to, this, thing, dot]
|
611
|
+
expect { subject.parse(literals) }.not_to raise_error
|
612
|
+
|
613
|
+
# Sentence 2-15b: "At another time, Lisa says something."
|
614
|
+
literals = [at, another, time, comma, lisa, says, something, dot]
|
615
|
+
expect { subject.parse(literals) }.not_to raise_error
|
616
|
+
|
617
|
+
# Sentence 2-15c: "Tony does something to this thing before Lisa says something."
|
618
|
+
literals = [tony, does, something, to, this, thing, before, lisa, says, something, dot]
|
619
|
+
expect { subject.parse(literals) }.not_to raise_error
|
620
|
+
|
621
|
+
# Sentence 2-16a: "Lisa does something for a long time."
|
622
|
+
literals = [lisa, does, something, for_, a_as_art, long, time, dot]
|
623
|
+
expect { subject.parse(literals) }.not_to raise_error
|
624
|
+
|
625
|
+
# Sentence 2-17a: "Tony does something for a short time."
|
626
|
+
literals = [tony, does, something, for_, a_as_art, short, time, dot]
|
627
|
+
expect { subject.parse(literals) }.not_to raise_error
|
628
|
+
|
629
|
+
# Sentence 2-17a: "Tony does not do this for a long time."
|
630
|
+
literals = [tony, does_aux, not_, do_, this_as_pronoun, for_, a_as_art, long, time, dot]
|
631
|
+
expect { subject.parse(literals) }.not_to raise_error
|
632
|
+
|
633
|
+
# Sentence 2-18a: "Lisa sees something move."
|
634
|
+
literals = [lisa, sees, something, move, dot]
|
635
|
+
expect { subject.parse(literals) }.not_to raise_error
|
636
|
+
|
637
|
+
# Sentence 2-18a: "Lisa moves near to this thing."
|
638
|
+
literals = [lisa, moves, near_to, this, thing, dot]
|
639
|
+
expect { subject.parse(literals) }.not_to raise_error
|
640
|
+
|
641
|
+
# Sentence 2-E-Xa: "A short time before, Tony was far from Lisa."
|
642
|
+
# Case of a time adverbial adjunct that is put in front position.
|
643
|
+
literals = [a_as_art, short, time, before_adverb, comma, tony, was, far, from, lisa, dot]
|
644
|
+
expect { subject.parse(literals) }.not_to raise_error
|
645
|
+
|
646
|
+
# Sentence 2-E-Xa: "At this time, Tony is near to Lisa"
|
647
|
+
literals = [at, this, time, comma, tony, is, near_to, lisa, dot]
|
648
|
+
expect { subject.parse(literals) }.not_to raise_error
|
649
|
+
|
650
|
+
# Sentence 2-E-Xa: "Tony is near to Lisa because Tony moved"
|
651
|
+
# Case of a time adverbial adjunct that is put in front position.
|
652
|
+
literals = [tony, is, near_to, lisa, because, tony, moved, dot]
|
653
|
+
expect { subject.parse(literals) }.not_to raise_error
|
654
|
+
end
|
588
655
|
=begin
|
589
656
|
TODO
|
657
|
+
|
590
658
|
Lesson 2.A
|
591
659
|
Xtra:
|
592
660
|
What Tony has is like what Lisa has.
|
@@ -30,6 +30,7 @@ conjunctive_pronoun = $ZenlishLexicon.name2terminal['ConjunctivePronoun']
|
|
30
30
|
demonstrative_pronoun = $ZenlishLexicon.name2terminal['DemonstrativePronoun']
|
31
31
|
demonstrative_determiner = $ZenlishLexicon.name2terminal['DemonstrativeDeterminer']
|
32
32
|
definite_article = $ZenlishLexicon.name2terminal['DefiniteArticle']
|
33
|
+
indefinite_article = $ZenlishLexicon.name2terminal['IndefiniteArticle']
|
33
34
|
cardinal = $ZenlishLexicon.name2terminal['Cardinal']
|
34
35
|
comparative_particle = $ZenlishLexicon.name2terminal['ComparativeParticle']
|
35
36
|
indefinite_quantifier = $ZenlishLexicon.name2terminal['IndefiniteQuantifier']
|
@@ -49,16 +50,20 @@ def add_entry(aLemma, aWordClass)
|
|
49
50
|
end
|
50
51
|
|
51
52
|
# Our minimalistic lexicon
|
53
|
+
add_entry('a', indefinite_article)
|
52
54
|
add_entry('about', preposition)
|
53
55
|
add_entry('above', preposition)
|
54
56
|
add_entry('alive', adjective)
|
55
57
|
add_entry('all', indefinite_quantifier)
|
56
58
|
add_entry('another', adjective)
|
57
59
|
add_entry('as', comparative_particle)
|
60
|
+
add_entry('at', preposition)
|
58
61
|
add_entry('bad', adjective)
|
59
62
|
add_entry('be', auxiliary_be)
|
60
63
|
add_entry('be', irregular_verb_be)
|
61
64
|
add_entry('because', subordinating_conjunction)
|
65
|
+
add_entry('before', adverb)
|
66
|
+
add_entry('before', subordinating_conjunction)
|
62
67
|
add_entry('big', adjective)
|
63
68
|
add_entry('can', modal_verb_can)
|
64
69
|
add_entry('do', auxiliary_do)
|
@@ -80,8 +85,10 @@ add_entry('know', irregular_verb)
|
|
80
85
|
add_entry('like', preposition)
|
81
86
|
add_entry('Lisa', proper_noun)
|
82
87
|
add_entry('living', adjective)
|
88
|
+
add_entry('long', adjective)
|
83
89
|
add_entry('many', indefinite_quantifier)
|
84
90
|
add_entry('more', adjective)
|
91
|
+
add_entry('move', regular_verb)
|
85
92
|
add_entry('near to', preposition)
|
86
93
|
add_entry('not', adverb_not)
|
87
94
|
add_entry('of', preposition_of)
|
@@ -95,6 +102,7 @@ add_entry('place', common_noun)
|
|
95
102
|
add_entry('same', adjective)
|
96
103
|
add_entry('say', irregular_verb_say)
|
97
104
|
add_entry('see', irregular_verb)
|
105
|
+
add_entry('short', adjective)
|
98
106
|
add_entry('side', common_noun)
|
99
107
|
add_entry('small', adjective)
|
100
108
|
add_entry('some', indefinite_quantifier)
|
@@ -108,6 +116,7 @@ add_entry('think', irregular_verb)
|
|
108
116
|
add_entry('this', demonstrative_determiner)
|
109
117
|
add_entry('this', demonstrative_pronoun)
|
110
118
|
add_entry('this one', demonstrative_pronoun)
|
119
|
+
add_entry('time', common_noun)
|
111
120
|
add_entry('to', preposition)
|
112
121
|
add_entry('Tony', proper_noun)
|
113
122
|
add_entry('touch', regular_verb)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zenlish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|
@@ -113,6 +113,7 @@ files:
|
|
113
113
|
- lib/zenlish/wclasses/demonstrative_determiner.rb
|
114
114
|
- lib/zenlish/wclasses/demonstrative_pronoun.rb
|
115
115
|
- lib/zenlish/wclasses/determiner.rb
|
116
|
+
- lib/zenlish/wclasses/indefinite_article.rb
|
116
117
|
- lib/zenlish/wclasses/indefinite_pronoun.rb
|
117
118
|
- lib/zenlish/wclasses/indefinite_quantifier.rb
|
118
119
|
- lib/zenlish/wclasses/irregular_verb.rb
|