zenlish 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.yardopts +6 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +95 -0
  7. data/Rakefile +6 -0
  8. data/lib/zenlish.rb +9 -0
  9. data/lib/zenlish/lex/empty_lexicon.rb +7 -0
  10. data/lib/zenlish/lex/empty_lexicon_factory.rb +32 -0
  11. data/lib/zenlish/lex/lexeme.rb +19 -0
  12. data/lib/zenlish/lex/lexical_entry.rb +19 -0
  13. data/lib/zenlish/lex/lexicon.rb +55 -0
  14. data/lib/zenlish/lex/literal.rb +16 -0
  15. data/lib/zenlish/parser/zenlish_grammar.rb +29 -0
  16. data/lib/zenlish/parser/zparser.rb +30 -0
  17. data/lib/zenlish/version.rb +3 -0
  18. data/lib/zenlish/wclasses/adjective.rb +9 -0
  19. data/lib/zenlish/wclasses/all_word_classes.rb +11 -0
  20. data/lib/zenlish/wclasses/article.rb +9 -0
  21. data/lib/zenlish/wclasses/common_noun.rb +9 -0
  22. data/lib/zenlish/wclasses/definite_article.rb +9 -0
  23. data/lib/zenlish/wclasses/demonstrative_determiner.rb +9 -0
  24. data/lib/zenlish/wclasses/determiner.rb +9 -0
  25. data/lib/zenlish/wclasses/indefinite_pronoun.rb +9 -0
  26. data/lib/zenlish/wclasses/irregular_verb.rb +9 -0
  27. data/lib/zenlish/wclasses/lexical_verb.rb +9 -0
  28. data/lib/zenlish/wclasses/noun.rb +11 -0
  29. data/lib/zenlish/wclasses/pronoun.rb +9 -0
  30. data/lib/zenlish/wclasses/proper_noun.rb +10 -0
  31. data/lib/zenlish/wclasses/test_hierarchy.rb +3 -0
  32. data/lib/zenlish/wclasses/verb.rb +9 -0
  33. data/lib/zenlish/wclasses/word_class.rb +20 -0
  34. data/spec/spec_helper.rb +12 -0
  35. data/spec/zenlish/lex/empty_lexicon_factory_spec.rb +35 -0
  36. data/spec/zenlish/lex/lexeme_spec.rb +39 -0
  37. data/spec/zenlish/lex/lexical_entry_spec.rb +46 -0
  38. data/spec/zenlish/lex/lexicon_spec.rb +104 -0
  39. data/spec/zenlish/lex/literal_spec.rb +41 -0
  40. data/spec/zenlish/parser/zenlish_grammar_spec.rb +21 -0
  41. data/spec/zenlish/parser/zparser_spec.rb +86 -0
  42. data/spec/zenlish/support/minimal_lexicon.rb +30 -0
  43. data/spec/zenlish/wclasses/common_noun_spec.rb +22 -0
  44. data/spec/zenlish/wclasses/irregular_verb_spec.rb +21 -0
  45. data/spec/zenlish/wclasses/proper_noun_spec.rb +21 -0
  46. data/spec/zenlish_spec.rb +5 -0
  47. data/zenlish.gemspec +61 -0
  48. metadata +158 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 026405b993a2736b1e5c5341308aa1ffab8e77ba07dfc38d2e2344e183c466a7
4
+ data.tar.gz: 795d56d6e9b9c5204678912833c5817d04fef64e56e2128ac6ad6962b349b676
5
+ SHA512:
6
+ metadata.gz: 1bc6680bbd7f6da39d795903c4ec9b67061bfbbc409289669f37a3fa97d50f176bc634f0b6c10a6f2de54b81e5595d38fbddad0dffb9a97b555c2bd83eb68d86
7
+ data.tar.gz: a3772e8100491357a0776a4a83cf039ca17d7ba01792284ad4e92d96c4666b2394815199e9681853f4b796cea6f533cab81da65b6b6756b79a5559183f22d2db
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,6 @@
1
+ --exclude examples --exclude features --exclude spec
2
+ --no-private
3
+ --markup markdown
4
+ -
5
+ Changelog.md
6
+ License.txt
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in skimplish.gemspec
6
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 TODO: Write your name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,95 @@
1
+ # Zenlish
2
+
3
+ ### What is __Zenlish__ ?
4
+
5
+ Zenlish = Zen + English
6
+
7
+
8
+ The goal of this project is to implement a toolkit subset of the English language, called ... Zenlish.
9
+ A [Controlled Natural Language]([https://en.wikipedia.org/wiki/Controlled_natural_language) defines a subset of a natural language -English, for instance- with a restricted syntax and restricted semantics.
10
+
11
+ ### Design of Zenlish language
12
+ #### Minimalism
13
+ The name of the language is a combination of 'Zen' and 'English'.
14
+ It reflects a desire to make Zenlish a simple language:
15
+ - The focus is put on a simplified syntax,
16
+ - A limited lexicon. Priority on most commonly used words.
17
+
18
+ #### Expressiveness
19
+ Zenlish should be rich enough to express ideas, facts in a fluid way (vs. contrived, artificial way). Litmus test: a Zenlish text should be easy to read to an English speaking person.
20
+
21
+ ### Zenlish as a library (gem)
22
+ Over time, the zenlish gem will contain:
23
+ - A tokenizer (tagging, lemmatizer)
24
+ - A lexicon [STARTED]
25
+ - A context-free grammar [STARTED]
26
+ - A parser [STARTED]
27
+ - Feature unification (for number, gender agreement)
28
+ - A simplified ontology
29
+
30
+ ### What is the purpose of __Zenlish__ ?
31
+ With __Zenlish__ it should be possible for a Ruby application to interact with
32
+ users with a language that is close enough to English.
33
+
34
+ ### Roadmap
35
+ The project is still in inception.
36
+ Here a tentative roadmap:
37
+
38
+ #### A) Support vocabulary and sentences from [Learn These Words First](http://learnthesewordsfirst.com/)
39
+ This website advocates the idea of a multi-layered dictionary.
40
+ At the core, there are about 300 essential words.
41
+ The choice of these words is inspired by the semantic primitives of [NSM
42
+ (Natural Semantic Metalanguage)](https://en.wikipedia.org/wiki/Natural_semantic_metalanguage).
43
+ The essential words are introduced in twelve lessons. Each lesson put the Words
44
+ in examplar sentences and pictures.
45
+
46
+ The project sub-goals are:
47
+ - To inject the 300 core words into Zenlish lexicon,
48
+ - Zenlish should be able to parse all the example sentences
49
+ - Also Zenlish should determine the semantics (i.e. meaning) of the sentences
50
+
51
+ #### B) Capability to read a complete book
52
+ A good candidate book is "The Edge of the Sky" by Roberto Trotta (ISBN 978-0-465-04471-9 : hardcover, ISBN 978-0-465-04490-0 : ebook).
53
+ Professor Trotta challenged himself by writing a book on Cosmology with the 1000 most used words. More details [here](http://robertotrotta.com/the-edge-of-the-sky/).
54
+
55
+ In order to achieve this goal, Zenlish should:
56
+ - Incorporate the 1000 words in its lexicon
57
+ - Have a grammar that allows the parsing of the sentences in the book.
58
+
59
+ #### C) Capability to interpret the meaning of a complete book
60
+ Probably, far-fetched. But it will be nice to launch query to Zenlish to check if
61
+ it has some understanding of the text it reads (i.e. has a semantic representation).
62
+
63
+
64
+
65
+ ## Installation
66
+
67
+ Add this line to your application's Gemfile:
68
+
69
+ ```ruby
70
+ gem 'zenlish'
71
+ ```
72
+
73
+ And then execute:
74
+
75
+ $ bundle
76
+
77
+ Or install it yourself as:
78
+
79
+ $ gem install zenlish
80
+
81
+ ## Usage
82
+
83
+ TODO: Write usage instructions here
84
+
85
+ ## Contributing
86
+
87
+ Bug reports and pull requests are welcome on GitHub at https://github.com/famished-tiger/Zenlish. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
88
+
89
+ ## License
90
+
91
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
92
+
93
+ ## Code of Conduct
94
+
95
+ Everyone interacting in the Zenlish project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/famished-tiger/Zenlish/blob/master/CODE_OF_CONDUCT.md).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file acts as a jumping-off point for loading dependencies expected
4
+ # for a Zenlish client.
5
+
6
+ require_relative './zenlish/version'
7
+ require_relative './zenlish/wclasses/all_word_classes'
8
+
9
+ # End of file
@@ -0,0 +1,7 @@
1
+ unless defined?($ZenlishLexicon)
2
+ require_relative 'empty_lexicon_factory'
3
+
4
+ sandbox = Object.new
5
+ sandbox.extend(Zenlish::Lex::EmptyLexiconFactory)
6
+ $ZenlishLexicon = sandbox.create_empty_lexicon
7
+ end
@@ -0,0 +1,32 @@
1
+ require_relative '../wclasses/all_word_classes'
2
+ require_relative 'lexicon'
3
+
4
+ module Zenlish
5
+ module Lex
6
+ module EmptyLexiconFactory
7
+ def create_empty_lexicon()
8
+ lexicon = Lexicon.new
9
+
10
+ add_word_classes(lexicon)
11
+ add_punctuation(lexicon)
12
+ lexicon
13
+ end
14
+
15
+ private
16
+
17
+ def add_word_classes(aLexicon)
18
+ aLexicon.add_terminal(WClasses::CommonNoun.new)
19
+ aLexicon.add_terminal(WClasses::ProperNoun.new)
20
+ aLexicon.add_terminal(WClasses::IrregularVerb.new)
21
+ aLexicon.add_terminal(WClasses::Adjective.new)
22
+ aLexicon.add_terminal(WClasses::DefiniteArticle.new)
23
+ aLexicon.add_terminal(WClasses::DemonstrativeDeterminer.new)
24
+ aLexicon.add_terminal(WClasses::IndefinitePronoun.new)
25
+ end
26
+
27
+ def add_punctuation(aLexicon)
28
+ aLexicon.add_terminal(Rley::Syntax::Terminal.new('Period'))
29
+ end
30
+ end # module
31
+ end # module
32
+ end # module
@@ -0,0 +1,19 @@
1
+ module Zenlish
2
+ module Lex
3
+ # TODO: document
4
+ class Lexeme
5
+ attr_reader :wclass
6
+ attr_reader :entry
7
+
8
+ def initialize(aWordClass, anEntry)
9
+ @wclass = aWordClass
10
+ @entry = anEntry
11
+ @entry.add_lexeme(self)
12
+ end
13
+
14
+ def lemma
15
+ entry.lemma
16
+ end
17
+ end # class
18
+ end # module
19
+ end # module
@@ -0,0 +1,19 @@
1
+ module Zenlish
2
+ module Lex
3
+ # TODO: document
4
+ class LexicalEntry
5
+ attr_reader :lemma
6
+ attr_reader :lexemes
7
+
8
+ def initialize(theLemma, aLexeme = nil)
9
+ @lemma = theLemma.dup
10
+ @lexemes = []
11
+ add_lexeme(aLexeme)
12
+ end
13
+
14
+ def add_lexeme(aLexeme)
15
+ lexemes << aLexeme if aLexeme
16
+ end
17
+ end # class
18
+ end # module
19
+ end # module
@@ -0,0 +1,55 @@
1
+ module Zenlish
2
+ module Lex
3
+ # A lexicon is a collection of lexical entries.
4
+ # Every entry is associated with one one more lexemes.
5
+ class Lexicon
6
+ attr_reader :entries
7
+ attr_reader :lemma2entry
8
+
9
+ # The list of terminal symbols. Examples of terminal symbols:
10
+ # - word classes,
11
+ # - punctuation signs,...
12
+ attr_reader :terminals
13
+ attr_reader :name2terminal
14
+
15
+ def initialize
16
+ @entries = []
17
+ @lemma2entry = {}
18
+ @terminals = []
19
+ @name2terminal = {}
20
+ end
21
+
22
+ def get_lexeme(aLemma)
23
+ lemma2entry.fetch(aLemma).lexemes.first
24
+ end
25
+
26
+ def add_terminal(aTerminal)
27
+ terminals << aTerminal
28
+ name2terminal[aTerminal.name] = aTerminal
29
+ end
30
+
31
+ def add_entry(anEntry)
32
+ entries << anEntry
33
+ lemma = anEntry.lemma
34
+
35
+ update_mapping(lemma2entry, lemma, anEntry)
36
+ end
37
+
38
+ private
39
+
40
+ def update_mapping(aHash, aKey, aValue)
41
+ if aHash.include?(aKey)
42
+ hit = aHash[aKey]
43
+ if hit.is_a?(Array)
44
+ hit << aValue
45
+ else
46
+ aHash[aKey] = [hit, aValue]
47
+ end
48
+ else
49
+ aHash[aKey] = aValue
50
+ end
51
+ end
52
+
53
+ end # class
54
+ end # module
55
+ end # module
@@ -0,0 +1,16 @@
1
+ require 'rley'
2
+
3
+ module Zenlish
4
+ module Lex
5
+ # TODO: document
6
+ class Literal < Rley::Lexical::Token
7
+ attr_reader :zlexeme
8
+
9
+ # initialize(theLexeme, aTerminal, aPosition) ⇒ Token
10
+ def initialize(literalText, aLexeme, aPosition)
11
+ super(literalText, aLexeme.wclass.name, aPosition)
12
+ @zlexeme = aLexeme
13
+ end
14
+ end # class
15
+ end # module
16
+ end # module
@@ -0,0 +1,29 @@
1
+ # Grammar for a simple subset of English language
2
+ # It is called Zenlish
3
+
4
+ require 'rley' # Load the Rley parsing library
5
+ require_relative '../lex/empty_lexicon'
6
+
7
+ ########################################
8
+ # Define a grammar for a highly English-like language
9
+ builder = Rley::Syntax::GrammarBuilder.new do
10
+ add_terminals(*$ZenlishLexicon.terminals)
11
+ # add_terminals('Period')
12
+
13
+ rule 'language' => 'sentence'
14
+ rule 'sentence' => 'simple_sentence'
15
+ rule 'simple_sentence' => 'declarative_simple_sentence'
16
+ rule 'declarative_simple_sentence' => 'noun_phrase verb_phrase Period'
17
+ rule 'noun_phrase' => 'noun'
18
+ rule 'noun' => 'ProperNoun'
19
+ rule 'noun' => 'CommonNoun'
20
+ rule 'verb_phrase' => 'lexical_verb complement'
21
+ rule 'lexical_verb' => 'IrregularVerb'
22
+ rule 'complement' => 'ProperNoun'
23
+ rule 'complement' => 'IndefinitePronoun'
24
+ rule 'complement' => 'DemonstrativeDeterminer noun'
25
+ rule 'complement' => 'DefiniteArticle Adjective CommonNoun'
26
+ end
27
+
28
+ # And now build the grammar...
29
+ ZenlishGrammar = builder.grammar
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'zenlish_grammar'
4
+
5
+ module Zenlish
6
+ class ZParser
7
+ attr_reader(:engine)
8
+
9
+ def initialize
10
+ # Create a Rley facade object
11
+ @engine = Rley::Engine.new
12
+
13
+ # Step 1. Load Zenlish grammar
14
+ @engine.use_grammar(ZenlishGrammar)
15
+ end
16
+
17
+ def parse(tokenSeq)
18
+ result = engine.parse(tokenSeq)
19
+
20
+ unless result.success?
21
+ # Stop if the parse failed...
22
+ line1 = "Parsing failed\n"
23
+ line2 = "Reason: #{result.failure_reason.message}"
24
+ raise StandardError, line1 + line2
25
+ end
26
+
27
+ return engine.to_ptree(result)
28
+ end
29
+ end # class
30
+ end # module
@@ -0,0 +1,3 @@
1
+ module Zenlish
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,9 @@
1
+ require_relative 'word_class'
2
+
3
+ module Zenlish
4
+ module WClasses
5
+ # TODO: document
6
+ class Adjective < WordClass
7
+ end # class
8
+ end # module
9
+ end # module
@@ -0,0 +1,11 @@
1
+ # Load the WordClass class hierarchy
2
+ # Algorithm: load the leaf classes from hierarchy
3
+
4
+ require_relative 'adjective'
5
+ require_relative 'common_noun'
6
+ require_relative 'pronoun'
7
+ require_relative 'proper_noun'
8
+ require_relative 'irregular_verb'
9
+ require_relative 'definite_article'
10
+ require_relative 'demonstrative_determiner'
11
+ require_relative 'indefinite_pronoun'
@@ -0,0 +1,9 @@
1
+ require_relative 'determiner'
2
+
3
+ module Zenlish
4
+ module WClasses
5
+ # TODO: document
6
+ class Article < Determiner
7
+ end # class
8
+ end # module
9
+ end # module
@@ -0,0 +1,9 @@
1
+ require_relative 'noun'
2
+
3
+ module Zenlish
4
+ module WClasses
5
+ # Common nouns refer to general entities. Most have a singular and plural form.
6
+ class CommonNoun < Noun
7
+ end # class
8
+ end # module
9
+ end # module
@@ -0,0 +1,9 @@
1
+ require_relative 'article'
2
+
3
+ module Zenlish
4
+ module WClasses
5
+ # TODO: document
6
+ class DefiniteArticle < Article
7
+ end # class
8
+ end # module
9
+ end # module