zenlish 0.1.12 → 0.1.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +7 -7
- data/lib/zenlish/lex/empty_lexicon_factory.rb +1 -0
- data/lib/zenlish/parser/zenlish_grammar.rb +8 -0
- data/lib/zenlish/version.rb +1 -1
- data/lib/zenlish/wclasses/all_word_classes.rb +1 -0
- data/lib/zenlish/wclasses/indefinite_article.rb +9 -0
- data/spec/zenlish/parser/zparser_spec.rb +68 -0
- data/spec/zenlish/support/minimal_lexicon.rb +9 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e6f916af0c3deb8ecbc0f46cf0ed8638d93e7ce33091070ae6549ca2a29d2b58
|
4
|
+
data.tar.gz: 07b3103e32b1d53afd46492544592d139af39af2730290985c429cc82bcaf34a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52030f24b26543dc6ad52abe9a00ce88407e4e7fb1e6201e9c5533b7f7718e1cb66af8d8281c7a4224c12bd6db7a1825f81b97899f9efaaeef560911671632f1
|
7
|
+
data.tar.gz: 851109870bf5ecfcc65f0385c88c2aa5546a8cd3bbab59a498b0cae904857d11e0b5d835ad43327bfe923085b97fc79a56113c4a25f1d126f24ec611c555b7d6
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## [0.1.13] - 2019-10-28
|
4
|
+
__Zenlish__ can parse all sentences in lesson 1 and 2-A..2-E from
|
5
|
+
[Learn These Words First](http://learnthesewordsfirst.com/).
|
6
|
+
|
7
|
+
### Added
|
8
|
+
- Class `IndefiniteArticle` to represent the article `a (an)`.
|
9
|
+
|
10
|
+
### Changed
|
11
|
+
- File `zparser_spec.rb`: tests include all sentences from lesson 2-E (118 sentences in total).
|
12
|
+
- `ZenlishGrammar`: added production rules for adverbial and prepositional phrases put in front position.
|
13
|
+
- File `minimal_lexicon.rb`: new entries in lexicon `a`, `at`, `before`, `long`,
|
14
|
+
`short`, `move`
|
15
|
+
- File `README.md` Updated the metrics table
|
16
|
+
|
3
17
|
## [0.1.12] - 2019-10-26
|
4
18
|
__Zenlish__ can parse all sentences in lesson 1 and 2-A..2-D from
|
5
19
|
[Learn These Words First](http://learnthesewordsfirst.com/).
|
data/README.md
CHANGED
@@ -47,14 +47,14 @@ sentences of the first lesson.
|
|
47
47
|
|
48
48
|
The intent is to deliver gem versions in small increments.
|
49
49
|
|
50
|
-
#### Some project metrics (v. 0.1.
|
50
|
+
#### Some project metrics (v. 0.1.13)
|
51
51
|
|Metric|Value|
|
52
52
|
|:-:|:-:|
|
53
|
-
| Number of lemmas in lexicon |
|
54
|
-
| [Coverage 100 commonest English words](https://en.wikipedia.org/wiki/Most_common_words_in_English) |
|
55
|
-
| Number of production rules in grammar |
|
56
|
-
| Number of lessons covered |
|
57
|
-
| Number of sentences in spec files |
|
53
|
+
| Number of lemmas in lexicon | 77 |
|
54
|
+
| [Coverage 100 commonest English words](https://en.wikipedia.org/wiki/Most_common_words_in_English) | 39 |
|
55
|
+
| Number of production rules in grammar | 103 |
|
56
|
+
| Number of lessons covered | 13 |
|
57
|
+
| Number of sentences in spec files | 118 |
|
58
58
|
|
59
59
|
|
60
60
|
### Roadmap
|
@@ -62,7 +62,7 @@ The intent is to deliver gem versions in small increments.
|
|
62
62
|
Here a tentative roadmap:
|
63
63
|
|
64
64
|
#### A) Ability to parse sentences from [Learn These Words First](http://learnthesewordsfirst.com/)
|
65
|
-
*STARTED*.
|
65
|
+
*STARTED*. 14% complete
|
66
66
|
This website advocates the idea of a multi-layered dictionary.
|
67
67
|
At the core, there are about 300 essential words.
|
68
68
|
The choice of these words is inspired by the semantic primitives of [NSM
|
@@ -25,6 +25,7 @@ module Zenlish
|
|
25
25
|
aLexicon.add_terminal(WClasses::CommonNoun.new.freeze)
|
26
26
|
aLexicon.add_terminal(WClasses::ComparativeParticle.new.freeze)
|
27
27
|
aLexicon.add_terminal(WClasses::DefiniteArticle.new.freeze)
|
28
|
+
aLexicon.add_terminal(WClasses::IndefiniteArticle.new.freeze)
|
28
29
|
aLexicon.add_terminal(WClasses::DegreeAdverb.new.freeze)
|
29
30
|
aLexicon.add_terminal(WClasses::DemonstrativeDeterminer.new.freeze)
|
30
31
|
aLexicon.add_terminal(WClasses::ConjunctivePronoun.new.freeze)
|
@@ -25,11 +25,15 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
25
25
|
rule 'dependent_clause' => 'simple_sentence'
|
26
26
|
rule 'dependent_clause' => 'DemonstrativePronoun'
|
27
27
|
rule 'affirmative_sentence' => 'noun_phrase verb_phrase'
|
28
|
+
rule 'affirmative_sentence' => 'propositional_phrase Comma noun_phrase verb_phrase'
|
28
29
|
rule 'affirmative_sentence' => 'AdverbThere IrregularVerbBe verb_be_complement'
|
29
30
|
rule 'affirmative_sentence' => 'numeral_of IrregularVerbBe verb_be_complement'
|
30
31
|
rule 'affirmative_sentence' => 'DemonstrativePronoun IrregularVerbBe verb_be_complement'
|
31
32
|
rule 'affirmative_sentence' => 'DemonstrativePronoun IrregularVerb verb_complement'
|
32
33
|
rule 'affirmative_sentence' => 'conjunctive_prefix IrregularVerbBe verb_be_complement'
|
34
|
+
|
35
|
+
# Case of time adjunct adverbial put in fromt position
|
36
|
+
rule 'affirmative_sentence' => 'simple_noun_phrase Adverb Comma noun_phrase verb_phrase'
|
33
37
|
rule 'negative_sentence' => 'noun_phrase negative_verb_phrase'
|
34
38
|
rule 'negative_sentence' => 'AdverbThere negative_verb_phrase'
|
35
39
|
rule 'negative_sentence' => 'numeral_of negative_verb_phrase'
|
@@ -68,6 +72,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
68
72
|
rule 'determiner' => 'single_determiner'
|
69
73
|
rule 'single_determiner' => 'DemonstrativeDeterminer'
|
70
74
|
rule 'single_determiner' => 'DefiniteArticle'
|
75
|
+
rule 'single_determiner' => 'IndefiniteArticle'
|
71
76
|
rule 'single_determiner' => 'IndefiniteQuantifier'
|
72
77
|
rule 'verb_phrase' => 'verb_group'
|
73
78
|
rule 'verb_phrase' => 'verb_group verb_complement'
|
@@ -75,10 +80,13 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
75
80
|
rule 'verb_phrase' => 'RegularVerbWant Preposition verb_group'
|
76
81
|
rule 'verb_phrase' => 'RegularVerbWant Preposition verb_group noun_phrase'
|
77
82
|
rule 'verb_phrase' => 'IrregularVerbDo DemonstrativePronoun'
|
83
|
+
rule 'verb_phrase' => 'IrregularVerbDo DemonstrativePronoun propositional_phrase'
|
78
84
|
rule 'verb_phrase' => 'ModalVerbCan verb_group DemonstrativePronoun'
|
79
85
|
rule 'verb_phrase' => 'IrregularVerbBe verb_be_complement'
|
80
86
|
rule 'verb_phrase' => 'IrregularVerbSay Colon Quote affirmative_sentence Period Quote'
|
81
87
|
rule 'verb_complement' => 'noun_phrase'
|
88
|
+
# perception verb (hear, see, watch, notice, ...): verb + object + infinitive
|
89
|
+
rule 'verb_complement' => 'simple_noun_phrase lexical_verb'
|
82
90
|
rule 'verb_complement' => 'Adjective propositional_phrase'
|
83
91
|
rule 'verb_complement' => 'noun_phrase IrregularVerbBe verb_be_complement'
|
84
92
|
rule 'verb_complement' => 'adverb_phrase'
|
data/lib/zenlish/version.rb
CHANGED
@@ -22,6 +22,7 @@ require_relative 'definite_article'
|
|
22
22
|
require_relative 'demonstrative_determiner'
|
23
23
|
require_relative 'conjunctive_pronoun'
|
24
24
|
require_relative 'demonstrative_pronoun'
|
25
|
+
require_relative 'indefinite_article'
|
25
26
|
require_relative 'indefinite_pronoun'
|
26
27
|
require_relative 'indefinite_quantifier'
|
27
28
|
require_relative 'linking_adverb'
|
@@ -30,6 +30,7 @@ module Zenlish
|
|
30
30
|
# In absence of a POS tagger/lemmatizer, we map input words
|
31
31
|
# to variables that themselves return Literal objects.
|
32
32
|
# For instance, next line will create a variable called 'alive'
|
33
|
+
literal2var('a', 'a', '_as_art')
|
33
34
|
literal2var('about', 'about')
|
34
35
|
literal2var('above', 'above')
|
35
36
|
literal2var('alive', 'alive')
|
@@ -37,8 +38,11 @@ module Zenlish
|
|
37
38
|
literal2var('another', 'another')
|
38
39
|
def are ; Lex::Literal.new('are', get_lexeme('be', WClasses::IrregularVerbBe), 0) ; end
|
39
40
|
literal2var('as', 'as')
|
41
|
+
literal2var('at', 'at')
|
40
42
|
literal2var('bad', 'bad')
|
41
43
|
literal2var('because', 'because')
|
44
|
+
def before_adverb ; Lex::Literal.new('before', get_lexeme('before', WClasses::Adverb), 0) ; end
|
45
|
+
def before ; Lex::Literal.new('before', get_lexeme('before', WClasses::SubordinatingConjunction), 0) ; end
|
42
46
|
literal2var('big', 'big')
|
43
47
|
literal2var('big', 'bigger')
|
44
48
|
literal2var('can', 'can')
|
@@ -67,8 +71,12 @@ module Zenlish
|
|
67
71
|
literal2var('like', 'like')
|
68
72
|
literal2var('Lisa', 'Lisa')
|
69
73
|
literal2var('living', 'living')
|
74
|
+
literal2var('long', 'long')
|
70
75
|
literal2var('many', 'many')
|
71
76
|
literal2var('more', 'more')
|
77
|
+
literal2var('move', 'move')
|
78
|
+
literal2var('move', 'moved')
|
79
|
+
literal2var('move', 'moves')
|
72
80
|
literal2var('near to', 'near_to')
|
73
81
|
literal2var('not', 'not', '_')
|
74
82
|
literal2var('on', 'on')
|
@@ -79,6 +87,7 @@ module Zenlish
|
|
79
87
|
literal2var('person', 'person')
|
80
88
|
literal2var('place', 'place')
|
81
89
|
literal2var('same', 'same')
|
90
|
+
literal2var('short', 'short')
|
82
91
|
literal2var('side', 'side')
|
83
92
|
literal2var('small', 'small')
|
84
93
|
literal2var('small', 'smaller')
|
@@ -99,6 +108,7 @@ module Zenlish
|
|
99
108
|
def this ; Lex::Literal.new('this', get_lexeme('this', WClasses::DemonstrativeDeterminer), 0) ; end
|
100
109
|
def this_as_pronoun ; Lex::Literal.new('this', get_lexeme('this', WClasses::DemonstrativePronoun), 0) ; end
|
101
110
|
literal2var('this one', 'this_one')
|
111
|
+
literal2var('time', 'time')
|
102
112
|
literal2var('to', 'to')
|
103
113
|
literal2var('Tony', 'Tony')
|
104
114
|
literal2var('touch', 'touching')
|
@@ -106,6 +116,7 @@ module Zenlish
|
|
106
116
|
literal2var('two', 'two')
|
107
117
|
literal2var('very', 'very')
|
108
118
|
literal2var('want', 'wants')
|
119
|
+
def was ; Lex::Literal.new('was', get_lexeme('be', WClasses::IrregularVerbBe), 0) ; end
|
109
120
|
literal2var('what', 'what')
|
110
121
|
literal2var('with', 'with')
|
111
122
|
literal2var('word', 'words')
|
@@ -585,8 +596,65 @@ module Zenlish
|
|
585
596
|
literals = [this_as_pronoun, feels, bad, for_, tony, dot]
|
586
597
|
expect { subject.parse(literals) }.not_to raise_error
|
587
598
|
end
|
599
|
+
|
600
|
+
it 'should parse sample sentences from lesson 2-E' do
|
601
|
+
# Sentence 2-14a: "Lisa says something at this time."
|
602
|
+
literals = [lisa, says, something, at, this, time, dot]
|
603
|
+
expect { subject.parse(literals) }.not_to raise_error
|
604
|
+
|
605
|
+
# Sentence 2-14b: "Tony is not in this place at this time."
|
606
|
+
literals = [tony, is, not_, in_, this, place, at, this, time, dot]
|
607
|
+
expect { subject.parse(literals) }.not_to raise_error
|
608
|
+
|
609
|
+
# Sentence 2-15a: "At one time, Tony does something to this thing."
|
610
|
+
literals = [at, one, time, comma, tony, does, something, to, this, thing, dot]
|
611
|
+
expect { subject.parse(literals) }.not_to raise_error
|
612
|
+
|
613
|
+
# Sentence 2-15b: "At another time, Lisa says something."
|
614
|
+
literals = [at, another, time, comma, lisa, says, something, dot]
|
615
|
+
expect { subject.parse(literals) }.not_to raise_error
|
616
|
+
|
617
|
+
# Sentence 2-15c: "Tony does something to this thing before Lisa says something."
|
618
|
+
literals = [tony, does, something, to, this, thing, before, lisa, says, something, dot]
|
619
|
+
expect { subject.parse(literals) }.not_to raise_error
|
620
|
+
|
621
|
+
# Sentence 2-16a: "Lisa does something for a long time."
|
622
|
+
literals = [lisa, does, something, for_, a_as_art, long, time, dot]
|
623
|
+
expect { subject.parse(literals) }.not_to raise_error
|
624
|
+
|
625
|
+
# Sentence 2-17a: "Tony does something for a short time."
|
626
|
+
literals = [tony, does, something, for_, a_as_art, short, time, dot]
|
627
|
+
expect { subject.parse(literals) }.not_to raise_error
|
628
|
+
|
629
|
+
# Sentence 2-17a: "Tony does not do this for a long time."
|
630
|
+
literals = [tony, does_aux, not_, do_, this_as_pronoun, for_, a_as_art, long, time, dot]
|
631
|
+
expect { subject.parse(literals) }.not_to raise_error
|
632
|
+
|
633
|
+
# Sentence 2-18a: "Lisa sees something move."
|
634
|
+
literals = [lisa, sees, something, move, dot]
|
635
|
+
expect { subject.parse(literals) }.not_to raise_error
|
636
|
+
|
637
|
+
# Sentence 2-18a: "Lisa moves near to this thing."
|
638
|
+
literals = [lisa, moves, near_to, this, thing, dot]
|
639
|
+
expect { subject.parse(literals) }.not_to raise_error
|
640
|
+
|
641
|
+
# Sentence 2-E-Xa: "A short time before, Tony was far from Lisa."
|
642
|
+
# Case of a time adverbial adjunct that is put in front position.
|
643
|
+
literals = [a_as_art, short, time, before_adverb, comma, tony, was, far, from, lisa, dot]
|
644
|
+
expect { subject.parse(literals) }.not_to raise_error
|
645
|
+
|
646
|
+
# Sentence 2-E-Xa: "At this time, Tony is near to Lisa"
|
647
|
+
literals = [at, this, time, comma, tony, is, near_to, lisa, dot]
|
648
|
+
expect { subject.parse(literals) }.not_to raise_error
|
649
|
+
|
650
|
+
# Sentence 2-E-Xa: "Tony is near to Lisa because Tony moved"
|
651
|
+
# Case of a time adverbial adjunct that is put in front position.
|
652
|
+
literals = [tony, is, near_to, lisa, because, tony, moved, dot]
|
653
|
+
expect { subject.parse(literals) }.not_to raise_error
|
654
|
+
end
|
588
655
|
=begin
|
589
656
|
TODO
|
657
|
+
|
590
658
|
Lesson 2.A
|
591
659
|
Xtra:
|
592
660
|
What Tony has is like what Lisa has.
|
@@ -30,6 +30,7 @@ conjunctive_pronoun = $ZenlishLexicon.name2terminal['ConjunctivePronoun']
|
|
30
30
|
demonstrative_pronoun = $ZenlishLexicon.name2terminal['DemonstrativePronoun']
|
31
31
|
demonstrative_determiner = $ZenlishLexicon.name2terminal['DemonstrativeDeterminer']
|
32
32
|
definite_article = $ZenlishLexicon.name2terminal['DefiniteArticle']
|
33
|
+
indefinite_article = $ZenlishLexicon.name2terminal['IndefiniteArticle']
|
33
34
|
cardinal = $ZenlishLexicon.name2terminal['Cardinal']
|
34
35
|
comparative_particle = $ZenlishLexicon.name2terminal['ComparativeParticle']
|
35
36
|
indefinite_quantifier = $ZenlishLexicon.name2terminal['IndefiniteQuantifier']
|
@@ -49,16 +50,20 @@ def add_entry(aLemma, aWordClass)
|
|
49
50
|
end
|
50
51
|
|
51
52
|
# Our minimalistic lexicon
|
53
|
+
add_entry('a', indefinite_article)
|
52
54
|
add_entry('about', preposition)
|
53
55
|
add_entry('above', preposition)
|
54
56
|
add_entry('alive', adjective)
|
55
57
|
add_entry('all', indefinite_quantifier)
|
56
58
|
add_entry('another', adjective)
|
57
59
|
add_entry('as', comparative_particle)
|
60
|
+
add_entry('at', preposition)
|
58
61
|
add_entry('bad', adjective)
|
59
62
|
add_entry('be', auxiliary_be)
|
60
63
|
add_entry('be', irregular_verb_be)
|
61
64
|
add_entry('because', subordinating_conjunction)
|
65
|
+
add_entry('before', adverb)
|
66
|
+
add_entry('before', subordinating_conjunction)
|
62
67
|
add_entry('big', adjective)
|
63
68
|
add_entry('can', modal_verb_can)
|
64
69
|
add_entry('do', auxiliary_do)
|
@@ -80,8 +85,10 @@ add_entry('know', irregular_verb)
|
|
80
85
|
add_entry('like', preposition)
|
81
86
|
add_entry('Lisa', proper_noun)
|
82
87
|
add_entry('living', adjective)
|
88
|
+
add_entry('long', adjective)
|
83
89
|
add_entry('many', indefinite_quantifier)
|
84
90
|
add_entry('more', adjective)
|
91
|
+
add_entry('move', regular_verb)
|
85
92
|
add_entry('near to', preposition)
|
86
93
|
add_entry('not', adverb_not)
|
87
94
|
add_entry('of', preposition_of)
|
@@ -95,6 +102,7 @@ add_entry('place', common_noun)
|
|
95
102
|
add_entry('same', adjective)
|
96
103
|
add_entry('say', irregular_verb_say)
|
97
104
|
add_entry('see', irregular_verb)
|
105
|
+
add_entry('short', adjective)
|
98
106
|
add_entry('side', common_noun)
|
99
107
|
add_entry('small', adjective)
|
100
108
|
add_entry('some', indefinite_quantifier)
|
@@ -108,6 +116,7 @@ add_entry('think', irregular_verb)
|
|
108
116
|
add_entry('this', demonstrative_determiner)
|
109
117
|
add_entry('this', demonstrative_pronoun)
|
110
118
|
add_entry('this one', demonstrative_pronoun)
|
119
|
+
add_entry('time', common_noun)
|
111
120
|
add_entry('to', preposition)
|
112
121
|
add_entry('Tony', proper_noun)
|
113
122
|
add_entry('touch', regular_verb)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zenlish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|
@@ -113,6 +113,7 @@ files:
|
|
113
113
|
- lib/zenlish/wclasses/demonstrative_determiner.rb
|
114
114
|
- lib/zenlish/wclasses/demonstrative_pronoun.rb
|
115
115
|
- lib/zenlish/wclasses/determiner.rb
|
116
|
+
- lib/zenlish/wclasses/indefinite_article.rb
|
116
117
|
- lib/zenlish/wclasses/indefinite_pronoun.rb
|
117
118
|
- lib/zenlish/wclasses/indefinite_quantifier.rb
|
118
119
|
- lib/zenlish/wclasses/irregular_verb.rb
|