zenlish 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.yardopts +6 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +95 -0
- data/Rakefile +6 -0
- data/lib/zenlish.rb +9 -0
- data/lib/zenlish/lex/empty_lexicon.rb +7 -0
- data/lib/zenlish/lex/empty_lexicon_factory.rb +32 -0
- data/lib/zenlish/lex/lexeme.rb +19 -0
- data/lib/zenlish/lex/lexical_entry.rb +19 -0
- data/lib/zenlish/lex/lexicon.rb +55 -0
- data/lib/zenlish/lex/literal.rb +16 -0
- data/lib/zenlish/parser/zenlish_grammar.rb +29 -0
- data/lib/zenlish/parser/zparser.rb +30 -0
- data/lib/zenlish/version.rb +3 -0
- data/lib/zenlish/wclasses/adjective.rb +9 -0
- data/lib/zenlish/wclasses/all_word_classes.rb +11 -0
- data/lib/zenlish/wclasses/article.rb +9 -0
- data/lib/zenlish/wclasses/common_noun.rb +9 -0
- data/lib/zenlish/wclasses/definite_article.rb +9 -0
- data/lib/zenlish/wclasses/demonstrative_determiner.rb +9 -0
- data/lib/zenlish/wclasses/determiner.rb +9 -0
- data/lib/zenlish/wclasses/indefinite_pronoun.rb +9 -0
- data/lib/zenlish/wclasses/irregular_verb.rb +9 -0
- data/lib/zenlish/wclasses/lexical_verb.rb +9 -0
- data/lib/zenlish/wclasses/noun.rb +11 -0
- data/lib/zenlish/wclasses/pronoun.rb +9 -0
- data/lib/zenlish/wclasses/proper_noun.rb +10 -0
- data/lib/zenlish/wclasses/test_hierarchy.rb +3 -0
- data/lib/zenlish/wclasses/verb.rb +9 -0
- data/lib/zenlish/wclasses/word_class.rb +20 -0
- data/spec/spec_helper.rb +12 -0
- data/spec/zenlish/lex/empty_lexicon_factory_spec.rb +35 -0
- data/spec/zenlish/lex/lexeme_spec.rb +39 -0
- data/spec/zenlish/lex/lexical_entry_spec.rb +46 -0
- data/spec/zenlish/lex/lexicon_spec.rb +104 -0
- data/spec/zenlish/lex/literal_spec.rb +41 -0
- data/spec/zenlish/parser/zenlish_grammar_spec.rb +21 -0
- data/spec/zenlish/parser/zparser_spec.rb +86 -0
- data/spec/zenlish/support/minimal_lexicon.rb +30 -0
- data/spec/zenlish/wclasses/common_noun_spec.rb +22 -0
- data/spec/zenlish/wclasses/irregular_verb_spec.rb +21 -0
- data/spec/zenlish/wclasses/proper_noun_spec.rb +21 -0
- data/spec/zenlish_spec.rb +5 -0
- data/zenlish.gemspec +61 -0
- metadata +158 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 026405b993a2736b1e5c5341308aa1ffab8e77ba07dfc38d2e2344e183c466a7
|
4
|
+
data.tar.gz: 795d56d6e9b9c5204678912833c5817d04fef64e56e2128ac6ad6962b349b676
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1bc6680bbd7f6da39d795903c4ec9b67061bfbbc409289669f37a3fa97d50f176bc634f0b6c10a6f2de54b81e5595d38fbddad0dffb9a97b555c2bd83eb68d86
|
7
|
+
data.tar.gz: a3772e8100491357a0776a4a83cf039ca17d7ba01792284ad4e92d96c4666b2394815199e9681853f4b796cea6f533cab81da65b6b6756b79a5559183f22d2db
|
data/.rspec
ADDED
data/.yardopts
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 TODO: Write your name
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
# Zenlish
|
2
|
+
|
3
|
+
### What is __Zenlish__ ?
|
4
|
+
|
5
|
+
Zenlish = Zen + English
|
6
|
+
|
7
|
+
|
8
|
+
The goal of this project is to implement a toolkit subset of the English language, called ... Zenlish.
|
9
|
+
A [Controlled Natural Language]([https://en.wikipedia.org/wiki/Controlled_natural_language) defines a subset of a natural language -English, for instance- with a restricted syntax and restricted semantics.
|
10
|
+
|
11
|
+
### Design of Zenlish language
|
12
|
+
#### Minimalism
|
13
|
+
The name of the language is a combination of 'Zen' and 'English'.
|
14
|
+
It reflects a desire to make Zenlish a simple language:
|
15
|
+
- The focus is put on a simplified syntax,
|
16
|
+
- A limited lexicon. Priority on most commonly used words.
|
17
|
+
|
18
|
+
#### Expressiveness
|
19
|
+
Zenlish should be rich enough to express ideas, facts in a fluid way (vs. contrived, artificial way). Litmus test: a Zenlish text should be easy to read to an English speaking person.
|
20
|
+
|
21
|
+
### Zenlish as a library (gem)
|
22
|
+
Over time, the zenlish gem will contain:
|
23
|
+
- A tokenizer (tagging, lemmatizer)
|
24
|
+
- A lexicon [STARTED]
|
25
|
+
- A context-free grammar [STARTED]
|
26
|
+
- A parser [STARTED]
|
27
|
+
- Feature unification (for number, gender agreement)
|
28
|
+
- A simplified ontology
|
29
|
+
|
30
|
+
### What is the purpose of __Zenlish__ ?
|
31
|
+
With __Zenlish__ it should be possible for a Ruby application to interact with
|
32
|
+
users with a language that is close enough to English.
|
33
|
+
|
34
|
+
### Roadmap
|
35
|
+
The project is still in inception.
|
36
|
+
Here a tentative roadmap:
|
37
|
+
|
38
|
+
#### A) Support vocabulary and sentences from [Learn These Words First](http://learnthesewordsfirst.com/)
|
39
|
+
This website advocates the idea of a multi-layered dictionary.
|
40
|
+
At the core, there are about 300 essential words.
|
41
|
+
The choice of these words is inspired by the semantic primitives of [NSM
|
42
|
+
(Natural Semantic Metalanguage)](https://en.wikipedia.org/wiki/Natural_semantic_metalanguage).
|
43
|
+
The essential words are introduced in twelve lessons. Each lesson put the Words
|
44
|
+
in examplar sentences and pictures.
|
45
|
+
|
46
|
+
The project sub-goals are:
|
47
|
+
- To inject the 300 core words into Zenlish lexicon,
|
48
|
+
- Zenlish should be able to parse all the example sentences
|
49
|
+
- Also Zenlish should determine the semantics (i.e. meaning) of the sentences
|
50
|
+
|
51
|
+
#### B) Capability to read a complete book
|
52
|
+
A good candidate book is "The Edge of the Sky" by Roberto Trotta (ISBN 978-0-465-04471-9 : hardcover, ISBN 978-0-465-04490-0 : ebook).
|
53
|
+
Professor Trotta challenged himself by writing a book on Cosmology with the 1000 most used words. More details [here](http://robertotrotta.com/the-edge-of-the-sky/).
|
54
|
+
|
55
|
+
In order to achieve this goal, Zenlish should:
|
56
|
+
- Incorporate the 1000 words in its lexicon
|
57
|
+
- Have a grammar that allows the parsing of the sentences in the book.
|
58
|
+
|
59
|
+
#### C) Capability to interpret the meaning of a complete book
|
60
|
+
Probably, far-fetched. But it will be nice to launch query to Zenlish to check if
|
61
|
+
it has some understanding of the text it reads (i.e. has a semantic representation).
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
## Installation
|
66
|
+
|
67
|
+
Add this line to your application's Gemfile:
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
gem 'zenlish'
|
71
|
+
```
|
72
|
+
|
73
|
+
And then execute:
|
74
|
+
|
75
|
+
$ bundle
|
76
|
+
|
77
|
+
Or install it yourself as:
|
78
|
+
|
79
|
+
$ gem install zenlish
|
80
|
+
|
81
|
+
## Usage
|
82
|
+
|
83
|
+
TODO: Write usage instructions here
|
84
|
+
|
85
|
+
## Contributing
|
86
|
+
|
87
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/famished-tiger/Zenlish. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
88
|
+
|
89
|
+
## License
|
90
|
+
|
91
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
92
|
+
|
93
|
+
## Code of Conduct
|
94
|
+
|
95
|
+
Everyone interacting in the Zenlish project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/famished-tiger/Zenlish/blob/master/CODE_OF_CONDUCT.md).
|
data/Rakefile
ADDED
data/lib/zenlish.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require_relative '../wclasses/all_word_classes'
|
2
|
+
require_relative 'lexicon'
|
3
|
+
|
4
|
+
module Zenlish
|
5
|
+
module Lex
|
6
|
+
module EmptyLexiconFactory
|
7
|
+
def create_empty_lexicon()
|
8
|
+
lexicon = Lexicon.new
|
9
|
+
|
10
|
+
add_word_classes(lexicon)
|
11
|
+
add_punctuation(lexicon)
|
12
|
+
lexicon
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def add_word_classes(aLexicon)
|
18
|
+
aLexicon.add_terminal(WClasses::CommonNoun.new)
|
19
|
+
aLexicon.add_terminal(WClasses::ProperNoun.new)
|
20
|
+
aLexicon.add_terminal(WClasses::IrregularVerb.new)
|
21
|
+
aLexicon.add_terminal(WClasses::Adjective.new)
|
22
|
+
aLexicon.add_terminal(WClasses::DefiniteArticle.new)
|
23
|
+
aLexicon.add_terminal(WClasses::DemonstrativeDeterminer.new)
|
24
|
+
aLexicon.add_terminal(WClasses::IndefinitePronoun.new)
|
25
|
+
end
|
26
|
+
|
27
|
+
def add_punctuation(aLexicon)
|
28
|
+
aLexicon.add_terminal(Rley::Syntax::Terminal.new('Period'))
|
29
|
+
end
|
30
|
+
end # module
|
31
|
+
end # module
|
32
|
+
end # module
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Zenlish
|
2
|
+
module Lex
|
3
|
+
# TODO: document
|
4
|
+
class Lexeme
|
5
|
+
attr_reader :wclass
|
6
|
+
attr_reader :entry
|
7
|
+
|
8
|
+
def initialize(aWordClass, anEntry)
|
9
|
+
@wclass = aWordClass
|
10
|
+
@entry = anEntry
|
11
|
+
@entry.add_lexeme(self)
|
12
|
+
end
|
13
|
+
|
14
|
+
def lemma
|
15
|
+
entry.lemma
|
16
|
+
end
|
17
|
+
end # class
|
18
|
+
end # module
|
19
|
+
end # module
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Zenlish
|
2
|
+
module Lex
|
3
|
+
# TODO: document
|
4
|
+
class LexicalEntry
|
5
|
+
attr_reader :lemma
|
6
|
+
attr_reader :lexemes
|
7
|
+
|
8
|
+
def initialize(theLemma, aLexeme = nil)
|
9
|
+
@lemma = theLemma.dup
|
10
|
+
@lexemes = []
|
11
|
+
add_lexeme(aLexeme)
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_lexeme(aLexeme)
|
15
|
+
lexemes << aLexeme if aLexeme
|
16
|
+
end
|
17
|
+
end # class
|
18
|
+
end # module
|
19
|
+
end # module
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Zenlish
|
2
|
+
module Lex
|
3
|
+
# A lexicon is a collection of lexical entries.
|
4
|
+
# Every entry is associated with one one more lexemes.
|
5
|
+
class Lexicon
|
6
|
+
attr_reader :entries
|
7
|
+
attr_reader :lemma2entry
|
8
|
+
|
9
|
+
# The list of terminal symbols. Examples of terminal symbols:
|
10
|
+
# - word classes,
|
11
|
+
# - punctuation signs,...
|
12
|
+
attr_reader :terminals
|
13
|
+
attr_reader :name2terminal
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@entries = []
|
17
|
+
@lemma2entry = {}
|
18
|
+
@terminals = []
|
19
|
+
@name2terminal = {}
|
20
|
+
end
|
21
|
+
|
22
|
+
def get_lexeme(aLemma)
|
23
|
+
lemma2entry.fetch(aLemma).lexemes.first
|
24
|
+
end
|
25
|
+
|
26
|
+
def add_terminal(aTerminal)
|
27
|
+
terminals << aTerminal
|
28
|
+
name2terminal[aTerminal.name] = aTerminal
|
29
|
+
end
|
30
|
+
|
31
|
+
def add_entry(anEntry)
|
32
|
+
entries << anEntry
|
33
|
+
lemma = anEntry.lemma
|
34
|
+
|
35
|
+
update_mapping(lemma2entry, lemma, anEntry)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def update_mapping(aHash, aKey, aValue)
|
41
|
+
if aHash.include?(aKey)
|
42
|
+
hit = aHash[aKey]
|
43
|
+
if hit.is_a?(Array)
|
44
|
+
hit << aValue
|
45
|
+
else
|
46
|
+
aHash[aKey] = [hit, aValue]
|
47
|
+
end
|
48
|
+
else
|
49
|
+
aHash[aKey] = aValue
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end # class
|
54
|
+
end # module
|
55
|
+
end # module
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'rley'
|
2
|
+
|
3
|
+
module Zenlish
|
4
|
+
module Lex
|
5
|
+
# TODO: document
|
6
|
+
class Literal < Rley::Lexical::Token
|
7
|
+
attr_reader :zlexeme
|
8
|
+
|
9
|
+
# initialize(theLexeme, aTerminal, aPosition) ⇒ Token
|
10
|
+
def initialize(literalText, aLexeme, aPosition)
|
11
|
+
super(literalText, aLexeme.wclass.name, aPosition)
|
12
|
+
@zlexeme = aLexeme
|
13
|
+
end
|
14
|
+
end # class
|
15
|
+
end # module
|
16
|
+
end # module
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Grammar for a simple subset of English language
|
2
|
+
# It is called Zenlish
|
3
|
+
|
4
|
+
require 'rley' # Load the Rley parsing library
|
5
|
+
require_relative '../lex/empty_lexicon'
|
6
|
+
|
7
|
+
########################################
|
8
|
+
# Define a grammar for a highly English-like language
|
9
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
10
|
+
add_terminals(*$ZenlishLexicon.terminals)
|
11
|
+
# add_terminals('Period')
|
12
|
+
|
13
|
+
rule 'language' => 'sentence'
|
14
|
+
rule 'sentence' => 'simple_sentence'
|
15
|
+
rule 'simple_sentence' => 'declarative_simple_sentence'
|
16
|
+
rule 'declarative_simple_sentence' => 'noun_phrase verb_phrase Period'
|
17
|
+
rule 'noun_phrase' => 'noun'
|
18
|
+
rule 'noun' => 'ProperNoun'
|
19
|
+
rule 'noun' => 'CommonNoun'
|
20
|
+
rule 'verb_phrase' => 'lexical_verb complement'
|
21
|
+
rule 'lexical_verb' => 'IrregularVerb'
|
22
|
+
rule 'complement' => 'ProperNoun'
|
23
|
+
rule 'complement' => 'IndefinitePronoun'
|
24
|
+
rule 'complement' => 'DemonstrativeDeterminer noun'
|
25
|
+
rule 'complement' => 'DefiniteArticle Adjective CommonNoun'
|
26
|
+
end
|
27
|
+
|
28
|
+
# And now build the grammar...
|
29
|
+
ZenlishGrammar = builder.grammar
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'zenlish_grammar'
|
4
|
+
|
5
|
+
module Zenlish
|
6
|
+
class ZParser
|
7
|
+
attr_reader(:engine)
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
# Create a Rley facade object
|
11
|
+
@engine = Rley::Engine.new
|
12
|
+
|
13
|
+
# Step 1. Load Zenlish grammar
|
14
|
+
@engine.use_grammar(ZenlishGrammar)
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse(tokenSeq)
|
18
|
+
result = engine.parse(tokenSeq)
|
19
|
+
|
20
|
+
unless result.success?
|
21
|
+
# Stop if the parse failed...
|
22
|
+
line1 = "Parsing failed\n"
|
23
|
+
line2 = "Reason: #{result.failure_reason.message}"
|
24
|
+
raise StandardError, line1 + line2
|
25
|
+
end
|
26
|
+
|
27
|
+
return engine.to_ptree(result)
|
28
|
+
end
|
29
|
+
end # class
|
30
|
+
end # module
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# Load the WordClass class hierarchy
|
2
|
+
# Algorithm: load the leaf classes from hierarchy
|
3
|
+
|
4
|
+
require_relative 'adjective'
|
5
|
+
require_relative 'common_noun'
|
6
|
+
require_relative 'pronoun'
|
7
|
+
require_relative 'proper_noun'
|
8
|
+
require_relative 'irregular_verb'
|
9
|
+
require_relative 'definite_article'
|
10
|
+
require_relative 'demonstrative_determiner'
|
11
|
+
require_relative 'indefinite_pronoun'
|