RubyGems - gherkin_language - Versions diffs - 0.0.7 → 0.0.8 - Mend

gherkin_language 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/.rubocop.yml +6 -6
data/README.md +2 -0
data/Rakefile +1 -1
data/bin/gherkin_language +4 -1
data/features/confused_words.feature +52 -0
data/features/sentences.feature +12 -12
data/gherkin_language.gemspec +1 -1
data/lib/gherkin_language.rb +32 -161
data/lib/gherkin_language/error.rb +34 -0
data/lib/gherkin_language/language_tool_process.rb +142 -0
metadata +4 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 04aee53e8b36b777bf0889ce6b0cdcd94fdef265
-  data.tar.gz: 54aaa1ba0d8625f05aeb8ee79e09878657988957
+  metadata.gz: db5be66787bff52612008a4e2999084191cdff53
+  data.tar.gz: 294d5c8d58ea483b2bb877f90cdbe4e7a0bae614
 SHA512:
-  metadata.gz: b3186581735b4460df28c170f865f4ee3cbb4e203cba63d77e78c3d6e1d9622a777483b9e104749972847c1a7eca36f93cd89c7f46afad1fca6be17d42903f12
-  data.tar.gz: 4563bb4bdfe7815bf4573c8352160c6db8fb86d01a120c0ae1155a053f7fdd423cc5e012f0a607f9d7ac17b98019c36a123147ae51d6571c555b863ad2cc0dc1
+  metadata.gz: 65eb341ef2cf91a80d11c42f531c20a1dc994e751dd0b538f579efb14a8065ef5acdb41ff7aedf7fc903a6c528b7261a00b30b6c7e9064861c004fd52aac2790
+  data.tar.gz: e3916bf3b624a8b8564122e9d4d05f16cbd1cee536d77e0754e88669377519fcdfbb27b507698bb63ae3e37cc3fc334cdc28360623a4274ee90862c466a70cc2

data/.rubocop.yml CHANGED Viewed

@@ -7,21 +7,21 @@
 # Offense count: 5
 Metrics/AbcSize:
-  Max: 48
+  Max: 36
 # Offense count: 2
 # Configuration parameters: CountComments.
 Metrics/ClassLength:
-  Max: 171
+  Max: 172
 # Offense count: 2
 Metrics/CyclomaticComplexity:
-  Max: 11
+  Max: 7
 # Offense count: 17
 # Configuration parameters: AllowURI, URISchemes.
 Metrics/LineLength:
-  Max: 121
+  Max: 117
 # Offense count: 8
 # Configuration parameters: CountComments.
@@ -34,5 +34,5 @@ Metrics/ParameterLists:
   Max: 8
 # Offense count: 2
-Metrics/PerceivedComplexity:
-  Max: 12
+#Metrics/PerceivedComplexity:
+#  Max: 8

data/README.md CHANGED Viewed

@@ -23,6 +23,8 @@ To tag all words used, start it with `--tag`. This allows to build up a glossary
 To ignore specific rules, mention them with an `--ignore RULE`. This allows to bypass the checks.
+To check for confused words, based on ngrams, add `--ngram`. Please note, that it requires much disk space and time.
 Glossary
 --------

data/Rakefile CHANGED Viewed

@@ -18,7 +18,7 @@ task test: :cucumber
 desc 'Publishes the Gem'
 task push: :build do
-  sh 'gem push gherkin_language-0.0.7.gem'
+  sh 'gem push gherkin_language-0.0.8.gem'
 end
 desc 'Checks ruby style'

data/bin/gherkin_language CHANGED Viewed

@@ -17,9 +17,12 @@ OptionParser.new do |opts|
   opts.on('--no-cache', 'do not use cache') do |no_cache|
     options[:no_cache] = no_cache
   end
+  opts.on('--ngram', 'check based on ngrams') do |ngram|
+    options[:ngram] = ngram
+  end
 end.parse!
-language = GherkinLanguage.new(options.key? :no_cache)
+language = GherkinLanguage.new(options.key?(:no_cache), options.key?(:ngram))
 if options.key? :sentences
   ARGV.each { |file| puts language.extract_sentences language.parse file }

data/features/confused_words.feature ADDED Viewed

@@ -0,0 +1,52 @@
+Feature: Confused Words
+  As a Business Analyst
+  I want to be informed for confused words
+  so that I know when I'm using wrong words
+  Background:
+    Given a file named "confused_words.rb" with:
+      """
+      $LOAD_PATH << '../../lib'
+      require 'gherkin_language'
+      no_cache = true
+      ngrams = true
+      language = GherkinLanguage.new(no_cache, ngrams)
+      language.analyze 'test.feature'
+      exit language.report
+      """
+  Scenario: Warns for confused word
+    Given a file named "test.feature" with:
+      """
+      Feature: Test
+        Scenario: Tag
+          Given I do not now where it is
+          When execute
+          Then pass
+      """
+    When I run `ruby confused_words.rb`
+    Then it should fail with exactly:
+      """
+      [non-conformance] CONFUSION_RULE
+        Statistic suggests that 'know' (to be aware of) might be the correct word here, not 'now' (in this moment). Please check.
+        Context: Given I do not now where it is when execute then pass
+        Replacements: know
+        References: test.feature
+      """
+  Scenario: Accept non confused words
+    Given a file named "test.feature" with:
+      """
+      Feature: Test
+        Scenario: Tag
+          Given I do not know where it is
+          When execute
+          Then pass
+      """
+    When I run `ruby confused_words.rb`
+    Then it should pass with exactly:
+      """
+      """

data/features/sentences.feature CHANGED Viewed

@@ -80,18 +80,18 @@ Feature: Sentences
       """
       Feature: Test
         Scenario Outline: Test
-          When <A> <B>
-          Then <C>
+          When <A>
+          Then <B>
           Examples: Table
-            | A |
-            | 1 |
-            | 2 |
+            | A | B |
+            | 1 | 3 |
+            | 2 | 4 |
           Examples: Second Table
-            | B | C |
-            | 3 | 5 |
-            | 4 | 6 |
+            | B | A |
+            | 5 | 7 |
+            | 6 | 8 |
       """
     When I run `ruby extract_sentences.rb`
     Then it should pass with:
@@ -99,11 +99,11 @@ Feature: Sentences
       Test
       Test
       Table
+      When 1 then 3
+      When 2 then 4
       Second Table
-      When 1 3 then 5
-      When 1 4 then 6
-      When 2 3 then 5
-      When 2 4 then 6
+      When 7 then 5
+      When 8 then 6
       """
   Scenario: Extract Sentences considers feature description

data/gherkin_language.gemspec CHANGED Viewed

@@ -1,6 +1,6 @@
 Gem::Specification.new do |s|
   s.name        = 'gherkin_language'
-  s.version     = '0.0.7'
+  s.version     = '0.0.8'
   s.date        = '2015-10-04'
   s.summary     = 'Gherkin Language'
   s.description = 'Check language of Gherkin Files'

data/lib/gherkin_language.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # encoding: utf-8
 require 'gherkin/formatter/json_formatter'
 require 'gherkin/parser/parser'
+require 'gherkin_language/error'
+require 'gherkin_language/language_tool_process'
 require 'rexml/document'
 require 'stringio'
 require 'multi_json'
@@ -14,143 +16,7 @@ require 'digest'
 # gherkin utilities
 class GherkinLanguage
-  # This service class provides access to language tool process.
-  class LanguageToolProcess
-    attr_accessor :errors, :unknown_words
-    VERSION = 'LanguageTool-3.0'
-    URL = "https://www.languagetool.org/download/#{VERSION}.zip"
-    # This value entity class represents a language error
-    class Error
-      attr_accessor :category, :context, :issuetype, :message, :replacements, :rule, :from_y, :to_y
-      def initialize(category, context, issuetype, message, replacements, rule, from_y, to_y)
-        @category = category
-        @context = context
-        @issuetype = issuetype
-        @message = message
-        @replacements = replacements
-        @rule = rule
-        @from_y = from_y
-        @to_y = to_y
-      end
-      def str(references)
-        (red("[#{@issuetype}] #{@rule}\n") +
-         "  #{@message}\n  Context: #{@context}\n  Replacements: #{@replacements}\n  References: #{references * ', '}\n")
-      end
-    end
-    def initialize
-      path = Dir.tmpdir
-      download path unless File.exist? "#{path}/#{VERSION}/languagetool-commandline.jar"
-      @path = path
-      @p = nil
-      @reference_line = 0
-      @errors = []
-      @unknown_words = []
-      use_user_glossary "#{path}/#{VERSION}" if File.exist? '.glossary'
-    end
-    def use_user_glossary(path)
-      resource_path = "#{path}/org/languagetool/resource/en"
-      system "cp #{resource_path}/added.txt #{resource_path}/added.copy && cp .glossary #{resource_path}/added.txt"
-      at_exit do
-        system "cp #{resource_path}/added.copy #{resource_path}/added.txt"
-      end
-    end
-    def download(path)
-      system "wget --quiet #{URL} -O /var/tmp/languagetool.zip"
-      FileUtils.mkdir_p path
-      system "unzip -qq -u /var/tmp/languagetool.zip -d #{path}"
-    end
-    def start!
-      @errors = []
-      @unknown_words = []
-      @reference_line = 0
-      Dir.chdir("#{@path}/#{VERSION}/") do
-        @p = IO.popen('java -jar languagetool-commandline.jar --list-unknown --api --language en-US -', 'r+')
-      end
-    end
-    def tag(sentences)
-      output = ''
-      Dir.chdir("#{@path}/#{VERSION}/") do
-        p = IO.popen('java -jar languagetool-commandline.jar --taggeronly --api --language en-US -', 'r+')
-        sentences.each { |sentence| p.write sentence }
-        p.close_write
-        line = p.readline
-        loop do
-          break if line == "<!--\n"
-          output << line
-          line = p.readline
-        end
-        p.close
-      end
-      output.gsub!(' ', "\n")
-      output.gsub!(']', "]\n")
-      output.gsub!("\n\n", "\n")
-      output
-    end
-    def check_paragraph(paragraph)
-      start_line = @reference_line
-      send paragraph
-      end_line = @reference_line
-      send "\n\n"
-      Range.new(start_line, end_line)
-    end
-    def send(sentence)
-      @reference_line += sentence.count "\n"
-      @p.write sentence
-    end
-    def parse_errors(result)
-      doc = REXML::Document.new result
-      errors = []
-      doc.elements.each '//error' do |error|
-        errors.push Error.new(
-          error.attributes['category'],
-          error.attributes['context'].strip,
-          error.attributes['locqualityissuetype'],
-          error.attributes['msg'],
-          error.attributes['replacements'],
-          error.attributes['ruleId'],
-          error.attributes['fromy'].to_i,
-          error.attributes['toy'].to_i)
-      end
-      errors
-    end
-    def parse_unknown_words(result)
-      doc = REXML::Document.new result
-      errors = []
-      doc.elements.each '//unknown_words/word' do |error|
-        errors.push error.text
-      end
-      errors
-    end
-    def stop!
-      @p.close_write
-      errors = ''
-      line = @p.readline
-      loop do
-        break if line == "<!--\n"
-        errors << line
-        line = @p.readline
-      end
-      @errors = parse_errors errors
-      @unknown_words = parse_unknown_words errors
-      @p.close
-    end
-  end
-  def initialize(no_cache = false)
+  def initialize(no_cache = false, ngram = false)
     path = "~/.gherkin_language/#{LanguageToolProcess::VERSION}/accepted_paragraphs.yml"
     @settings_path = File.expand_path path
     @accepted_paragraphs = {}
@@ -162,6 +28,7 @@ class GherkinLanguage
     @references = {}
     @line_to_reference = {}
     @exceptions = []
+    @ngram = ngram
   end
   def ignore(exception)
@@ -219,7 +86,7 @@ class GherkinLanguage
   def report
     return 0 if @references.keys.empty?
-    language = LanguageToolProcess.new
+    language = LanguageToolProcess.new @ngram
     language.start!
     @references.keys.each do |sentence|
@@ -294,43 +161,47 @@ class GherkinLanguage
     background = []
     input.each do |features|
       next unless features.key? 'elements'
-      elements = features['elements']
-      elements.each do |scenario|
+      features['elements'].each do |scenario|
         next unless scenario.key? 'steps'
         terms = background.dup
         if scenario['type'] == 'background'
-          scenario['steps'].each do |step|
-            new_terms = [step['keyword'], step['name']].join
-            new_terms = uncapitalize(new_terms) unless terms.empty?
-            background.push new_terms
-          end
+          background.push extract_terms_from_scenario(scenario['steps'], terms)
           next
         end
-        scenario['steps'].each do |step|
-          keyword = step['keyword']
-          keyword = 'and ' unless background.empty? || keyword != 'Given '
-          new_terms = [keyword, step['name']].join
-          new_terms = uncapitalize(new_terms) unless terms.empty?
-          terms.push new_terms
-        end
-        sentence = terms.join ' '
+        terms.push extract_terms_from_scenario(scenario['steps'], background)
+        sentence = terms.join(' ').strip
         if scenario.key? 'examples'
-          prototypes = [sentence.strip]
-          scenario['examples'].each do |example|
-            sentences.push example['name'] unless example['name'].empty?
-            sentences.push example['description'] unless example['description'].empty?
-            prototypes = prototypes.map { |prototype| expand_outlines(prototype, example) }.flatten
-          end
-          sentences += prototypes
+          sentences += extract_examples(scenario['examples'], sentence)
         else
-          sentences.push sentence.strip
+          sentences.push sentence
         end
       end
     end
     sentences
   end
+  def extract_terms_from_scenario(steps, background)
+    steps.map do |step|
+      keyword = step['keyword']
+      keyword = 'and ' unless background.empty? || keyword != 'Given '
+      terms = [keyword, step['name']].join
+      terms = uncapitalize(terms) unless background.empty?
+      background = terms
+      terms
+    end.flatten
+  end
+  def extract_examples(examples, prototype)
+    examples.map do |example|
+      sentences = []
+      sentences.push example['name'] unless example['name'].empty?
+      sentences.push example['description'] unless example['description'].empty?
+      sentences += expand_outlines(prototype, example)
+      sentences
+    end.flatten
+  end
   def uncapitalize(term)
     term[0, 1].downcase + term[1..-1]
   end

data/lib/gherkin_language/error.rb ADDED Viewed

@@ -0,0 +1,34 @@
+# encoding: utf-8
+require 'gherkin/formatter/json_formatter'
+require 'gherkin/parser/parser'
+require 'rexml/document'
+require 'stringio'
+require 'multi_json'
+require 'term/ansicolor'
+include Term::ANSIColor
+require 'tmpdir'
+require 'fileutils'
+require 'yaml'
+require 'set'
+require 'digest'
+# This value entity class represents a language error
+class Error
+  attr_accessor :category, :context, :issuetype, :message, :replacements, :rule, :from_y, :to_y
+  def initialize(category, context, issuetype, message, replacements, rule, from_y, to_y)
+    @category = category
+    @context = context
+    @issuetype = issuetype
+    @message = message
+    @replacements = replacements
+    @rule = rule
+    @from_y = from_y
+    @to_y = to_y
+  end
+  def str(references)
+    (red("[#{@issuetype}] #{@rule}\n") +
+     "  #{@message}\n  Context: #{@context}\n  Replacements: #{@replacements}\n  References: #{references * ', '}\n")
+  end
+end

data/lib/gherkin_language/language_tool_process.rb ADDED Viewed

@@ -0,0 +1,142 @@
+# encoding: utf-8
+require 'gherkin/formatter/json_formatter'
+require 'gherkin/parser/parser'
+require 'rexml/document'
+require 'stringio'
+require 'multi_json'
+require 'term/ansicolor'
+include Term::ANSIColor
+require 'tmpdir'
+require 'fileutils'
+require 'yaml'
+require 'set'
+require 'digest'
+# This service class provides access to language tool process.
+class LanguageToolProcess
+  attr_accessor :errors, :unknown_words
+  VERSION = 'LanguageTool-3.1'
+  URL = "https://www.languagetool.org/download/#{VERSION}.zip"
+  NGRAM_VERSION = 'ngrams-en-20150817'
+  NGRAM_URL = "https://languagetool.org/download/ngram-data/#{NGRAM_VERSION}.zip"
+  def initialize(ngrams = false)
+    path = Dir.tmpdir
+    download(path, URL) unless File.exist? "#{path}/#{VERSION}/languagetool-commandline.jar"
+    if ngrams
+      @ngrams_path = "#{path}/#{NGRAM_VERSION}"
+      download("#{@ngrams_path}/en", NGRAM_URL) unless File.exist? @ngrams_path
+    end
+    @path = path
+    @p = nil
+    @reference_line = 0
+    @errors = []
+    @unknown_words = []
+    @ngrams = ngrams
+    use_user_glossary "#{path}/#{VERSION}" if File.exist? '.glossary'
+  end
+  def use_user_glossary(path)
+    resource_path = "#{path}/org/languagetool/resource/en"
+    system "cp #{resource_path}/added.txt #{resource_path}/added.copy && cp .glossary #{resource_path}/added.txt"
+    at_exit do
+      system "cp #{resource_path}/added.copy #{resource_path}/added.txt"
+    end
+  end
+  def download(path, url)
+    system "wget --quiet #{url} -O /var/tmp/languagetool.zip"
+    FileUtils.mkdir_p path
+    system "unzip -qq -u /var/tmp/languagetool.zip -d #{path}"
+    system 'rm /var/tmp/languagetool.zip'
+  end
+  def start!
+    @errors = []
+    @unknown_words = []
+    @reference_line = 0
+    Dir.chdir("#{@path}/#{VERSION}/") do
+      command = 'java -jar languagetool-commandline.jar --list-unknown --api --language en-US'
+      command += " --languagemodel #{@ngrams_path}" if @ngrams
+      @p = IO.popen("#{command} -", 'r+')
+    end
+  end
+  def tag(sentences)
+    output = ''
+    Dir.chdir("#{@path}/#{VERSION}/") do
+      p = IO.popen('java -jar languagetool-commandline.jar --taggeronly --api --language en-US -', 'r+')
+      sentences.each { |sentence| p.write sentence }
+      p.close_write
+      line = p.readline
+      loop do
+        break if line == "<!--\n"
+        output << line
+        line = p.readline
+      end
+      p.close
+    end
+    output.gsub!(' ', "\n")
+    output.gsub!(']', "]\n")
+    output.gsub!("\n\n", "\n")
+    output
+  end
+  def check_paragraph(paragraph)
+    start_line = @reference_line
+    send paragraph
+    end_line = @reference_line
+    send "\n\n"
+    Range.new(start_line, end_line)
+  end
+  def send(sentence)
+    @reference_line += sentence.count "\n"
+    @p.write sentence
+  end
+  def parse_errors(result)
+    doc = REXML::Document.new result
+    errors = []
+    doc.elements.each '//error' do |error|
+      errors.push decode_error error
+    end
+    errors
+  end
+  def decode_error(error)
+    Error.new(
+      error.attributes['category'],
+      error.attributes['context'].strip,
+      error.attributes['locqualityissuetype'],
+      error.attributes['msg'],
+      error.attributes['replacements'],
+      error.attributes['ruleId'],
+      error.attributes['fromy'].to_i,
+      error.attributes['toy'].to_i)
+  end
+  def parse_unknown_words(result)
+    doc = REXML::Document.new result
+    errors = []
+    doc.elements.each '//unknown_words/word' do |error|
+      errors.push error.text
+    end
+    errors
+  end
+  def stop!
+    @p.close_write
+    errors = ''
+    line = @p.readline
+    loop do
+      break if line == "<!--\n"
+      errors << line
+      line = @p.readline
+    end
+    @errors = parse_errors errors
+    @unknown_words = parse_unknown_words errors
+    @p.close
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: gherkin_language
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.8
 platform: ruby
 authors:
 - Stefan Rohe
@@ -67,6 +67,7 @@ files:
 - README.md
 - Rakefile
 - bin/gherkin_language
+- features/confused_words.feature
 - features/correct.feature
 - features/exception.feature
 - features/glossary.feature
@@ -76,6 +77,8 @@ files:
 - features/tag.feature
 - gherkin_language.gemspec
 - lib/gherkin_language.rb
+- lib/gherkin_language/error.rb
+- lib/gherkin_language/language_tool_process.rb
 - test/test_gherkin_language.rb
 - test/test_gherkin_language_tool.rb
 homepage: http://github.com/funkwerk/gherkin_language/