RubyGems - agenda - Versions diffs - 0.1.2 - Mend

agenda 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

data/.gitignore +4 -0
data/Gemfile +4 -0
data/README.rdoc +7 -0
data/Rakefile +2 -0
data/agenda.gemspec +23 -0
data/config/alternate-dictionary.yaml +1 -0
data/config/dictionary.yaml +7 -0
data/config/regexp.yaml +2 -0
data/features/dictionaries.feature +52 -0
data/features/steps/dictionaries.rb +35 -0
data/features/steps/tagged_result.rb +11 -0
data/features/steps/tagging.rb +7 -0
data/features/steps/word_count.rb +21 -0
data/features/support/env.rb +2 -0
data/features/tagged_result.feature +18 -0
data/features/tagging.feature +20 -0
data/features/word_count.feature +78 -0
data/lib/agenda.rb +40 -0
data/lib/agenda/analyzer.rb +31 -0
data/lib/agenda/version.rb +3 -0
data/lib/agenda/word.rb +31 -0
data/lib/agenda/wordarray.rb +37 -0
data/spec/agenda/analyzer_specs.rb +31 -0
data/spec/agenda/word_spec.rb +20 -0
data/spec/spec_helper.rb +1 -0
metadata +83 -0

data/.gitignore ADDED

@@ -0,0 +1,4 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*

data/Gemfile ADDED

@@ -0,0 +1,4 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in agenda.gemspec
+gemspec

data/README.rdoc ADDED

@@ -0,0 +1,7 @@
+= Agenda
+Set of discourse analysis tools aimed at providing a fast redaction quality and tone feedback for auditors and editors.
+== Current status
+Agenda is currently under development, it has not been released as a gem yet.

data/Rakefile ADDED

	@@ -0,0 +1,2 @@
1	+ require 'bundler'
2	+ Bundler::GemHelper.install_tasks

data/agenda.gemspec ADDED

@@ -0,0 +1,23 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "agenda/version"
+Gem::Specification.new do |s|
+  s.name        = "agenda"
+  s.version     = Agenda::VERSION
+  s.platform    = Gem::Platform::RUBY
+  s.authors     = ["Xavier Via"]
+  s.email       = ["xavier.via.canel@gmail.com"]
+  s.homepage    = ""
+  s.summary     = %q{Text analysis package oriented to deconstruct discourse}
+  s.description = %q{Text analysis package oriented to deconstruct discourse}
+  s.rubyforge_project = "agenda"
+  s.add_development_dependency "rspec"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+end

data/config/alternate-dictionary.yaml ADDED

	@@ -0,0 +1 @@
1	+ freak: [elf, orc, dwarf, human]

data/config/dictionary.yaml ADDED

@@ -0,0 +1,7 @@
+proactive: [action, future]
+negative: [disgrace]
+neutral: [plane]
+common: [of, the, in, a, and]
+article: [the, a]
+verb: [was, shape]
+noun: [plane, disgrace]

data/config/regexp.yaml ADDED

	@@ -0,0 +1,2 @@
1	+ discarded: >
2	+ \\\. \/(),{}*'\"¿?¡!“”\s\[\]

data/features/dictionaries.feature ADDED

@@ -0,0 +1,52 @@
+Feature: Dictionaries
+  In order to list special words
+  As an Agenda developer
+  I want to load YAML dictionaries
+  Scenario: Discarded words regexp
+    Given the file "config/regexp.yaml"
+    And it contains:
+      """
+      discarded:
+      """
+    When is loaded
+    Then I should get a discarded chars regexp
+  Scenario: Negatively loaded words
+    Given the file "config/dictionary.yaml"
+    And it contains:
+      """
+      negative: [disgrace
+      """
+    When is loaded
+    Then I should get a negative words dictionary
+    And negative dictionary should have "disgrace"
+  Scenario: Common words
+    Given the file "config/dictionary.yaml"
+    And it contains:
+      """
+      common: [of, the
+      """
+    When is loaded
+    Then I should get a common words dictionary
+  Scenario: Load dictionary
+    Given the file "config/alternate-dictionary.yaml"
+    And it contains:
+      """
+      freak: [elf, orc
+      """
+    When I load it
+    Then I should get a freak words dictionary
+    And I should get a common words dictionary
+  Scenario: Replace dictionary
+    Given the file "config/alternate-dictionary.yaml"
+    And it contains:
+      """
+      freak: [elf, orc
+      """
+    When I replace dictionaries with it
+    Then I should get a freak words dictionary
+    And I should not get a common words dictionary

data/features/steps/dictionaries.rb ADDED

@@ -0,0 +1,35 @@
+Given /the file "(.+?)"$/ do |file|
+  @path = File.dirname(File.dirname(File.dirname(__FILE__))) + "/" + file
+  File.should exist(@path)
+end
+Given /it contains:/ do |content|
+  @the_content = File.read @path
+  @the_content.should include(content)
+end
+When /is loaded/ do; end
+When /I load it/ do
+  Agenda.load_dictionary @path
+end
+When /I replace dictionaries with it/ do
+  Agenda.replace_dictionaries_with @path
+end
+Then /I should get a (.+?) words dictionary/ do |type|
+  Agenda.dictionary[type.to_sym].should_not be_empty
+end
+Then /I should get a (.+?) chars regexp/ do |type|
+  Agenda.regexp[type].should be_a(Regexp)
+end
+Then /^(.+?) dictionary should have "(.+?)"$/ do |dictionary, word|
+  Agenda.dictionary[dictionary.to_sym].should include(word)
+end
+Then /I should not get a (.+?) words dictionary/ do |type|
+  Agenda.dictionary.should_not have_key(type.to_sym)
+end

data/features/steps/tagged_result.rb ADDED

@@ -0,0 +1,11 @@
+Then /in :(.+?) I should get "(.+?)"$/ do |where, what|
+  @the_words.tag(where.to_sym).should include(Agenda::Word.new(what))
+end
+Then /in non :(.+?) I should not get "(.+?)"$/ do |where, what|
+  @the_words.not_tag(where.to_sym).should_not include(Agenda::Word.new(what))
+end
+Then /in non :(.+?) I should get "(.+?)"$/ do |where, what|
+  @the_words.not_tag(where.to_sym).should include(Agenda::Word.new(what))
+end

data/features/steps/tagging.rb ADDED

@@ -0,0 +1,7 @@
+When /I ask for word tagging/ do
+  @the_words = @the_text.agenda
+end
+Then /I should get :(.+?) in "(.+?)"$/ do |tag, word|
+  @the_words.get(word).should have_tag(tag.to_sym)
+end

data/features/steps/word_count.rb ADDED

@@ -0,0 +1,21 @@
+Given /^the text:$/ do |string|
+  @the_text = string
+end
+Given /the text "(.+)"$/ do |string|
+  @the_text = string
+end
+When /^I start word counting$/ do
+  @the_words = @the_text.agenda
+end
+Then /I should get (\d+) in "([^"]+)"/ do |number, text|
+  @the_words.get(text).count.should == number.to_i
+end
+Then /^I should get:$/ do |string|
+  pending do
+    @the_words.join("\n").should == string
+  end
+end

data/features/support/env.rb ADDED

	@@ -0,0 +1,2 @@
1	+ $LOAD_PATH << File.expand_path("../../../lib", __FILE__)
2	+ require "agenda"

data/features/tagged_result.feature ADDED

@@ -0,0 +1,18 @@
+Feature: Tagged result
+  In order to get an easily readable tagged result
+  As an user
+  I want to get tagged results
+  Scenario: Proactive tagging
+    Given the text "Take action and shape the future."
+    When I ask for word tagging
+    Then in :proactive I should get "action"
+    And in :verb I should get "shape"
+    And in :proactive I should get "future"
+  Scenario: Non-common words
+    Given the text "The task was easy and at hand."
+    When I ask for word tagging
+    Then in non :common I should not get "the"
+    And in non :common I should get "task"
+    And in non :common I should not get "and"

data/features/tagging.feature ADDED

@@ -0,0 +1,20 @@
+Feature: Word categorization within text
+  In order to distinguish tone of discourse
+  As a text analyst
+  I want to be able to tag words
+  Scenario: Loaded word
+    Given the text "The plane was a disgrace."
+    When I ask for word tagging
+    Then I should get 1 in "disgrace"
+    And I should get :negative in "disgrace"
+    And I should get :neutral in "plane"
+  Scenario: Type tagging
+    Given the text "The plane was a disgrace."
+    When I ask for word tagging
+    And I should get :article in "the"
+    And I should get :verb in "was"
+    And I should get :article in "a"
+    And I should get :noun in "plane"
+    And I should get :noun in "disgrace"

data/features/word_count.feature ADDED

@@ -0,0 +1,78 @@
+Feature: Count words from a text
+  In order to detect word saturation
+  As an amateur writer
+  I want to get an automated count of my words
+  Scenario: Simple word count
+    Given the text "I want to know to be"
+    When I start word counting
+    Then I should get 1 in "want"
+    And I should get 2 in "to"
+  Scenario: Discarding dots
+    Given the text "to be to."
+    When I start word counting
+    Then I should get 2 in "to"
+  Scenario: Discarding parenthesis, commas and slashes
+    Given the text "The (main) extra/thing, here, main."
+    When I start word counting
+    Then I should get 1 in "the"
+    And I should get 2 in "main"
+    And I should get 1 in "extra"
+    And I should get 1 in "thing"
+    And I should get 1 in "here"
+  Scenario: Discarding brackets, any bracket, and slashes
+    Given the text "{the building} [is neat] /truly\"
+    When I start word counting
+    Then I should get 1 in "the"
+    And I should get 1 in "building"
+    And I should get 1 in "is"
+    And I should get 1 in "neat"
+    And I should get 1 in "truly"
+  Scenario: Discarding asterisks, apostrophes and quotes
+    Given the text "*in* my' quotes""
+    When I start word counting
+    Then I should get 1 in "in"
+    And I should get 1 in "my"
+    And I should get 1 in "quotes"
+  Scenario: Discarding exclamation marks and interrogation marks
+    Given the text "¡Yes!, ¿What?"
+    When I start word counting
+    Then I should get 1 in "yes"
+    And I should get 1 in "what"
+  Scenario: Discarding special quotes
+    Given the text "“Special quotes”"
+    When I start word counting
+    Then I should get 1 in "special"
+    And I should get 1 in "quotes"
+  Scenario: Everything downcase
+    Given the text "To be to"
+    When I start word counting
+    Then I should get 2 in "to"
+    And I should get 2 in "To"
+    And I should get 2 in "tO"
+    And I should get 2 in "TO"
+  Scenario: Render in order a set of words
+    Given the text:
+      """
+        I use a given word a lot of times. A lot.
+      """
+    When I start word counting
+    Then I should get:
+      """
+      a: 3
+      lot: 2
+      i: 1
+      use: 1
+      given: 1
+      word: 1
+      of: 1
+      times: 1
+      """

data/lib/agenda.rb ADDED

@@ -0,0 +1,40 @@
+require "yaml"
+module Agenda
+  base_path = File.dirname(File.dirname(File.expand_path(__FILE__)))
+  @@base_dictionary = YAML.load_file "#{base_path}/config/dictionary.yaml"
+  @@dictionary = {}
+  @@base_dictionary.each do |key, value|
+    @@dictionary[key.to_sym] = value
+  end
+  @@regexp = YAML.load_file "#{base_path}/config/regexp.yaml"
+  @@regexp.each do |key, value|
+    @@regexp[key] = Regexp.new "[#{value}]"
+  end
+  def self.dictionary; @@dictionary; end
+  def self.regexp; @@regexp; end
+  def self.load_dictionary(path)
+    YAML.load_file(path).each do |key, value|
+      @@dictionary[key.to_sym] = value
+    end
+  end
+  def self.replace_dictionaries_with(path)
+    @@dictionary = {}
+    Agenda.load_dictionary(path)
+  end
+end
+# Gem own requires
+require "agenda/word"
+require "agenda/wordarray"
+require "agenda/analyzer"
+class String
+  def agenda; Agenda::Analyzer.new(self).tag_words; end
+end

data/lib/agenda/analyzer.rb ADDED

@@ -0,0 +1,31 @@
+module Agenda
+  class Analyzer
+    def initialize(the_string)
+      @the_string = the_string.force_encoding "UTF-8"
+    end
+    def word_count
+      words = WordArray.new
+      the_discarded_regexp = Agenda.regexp["discarded"]
+      @the_string.split(the_discarded_regexp).each do |word|
+        if word != ""
+          words << Word.new(word) unless words.has?(word)
+          words.get(word).count += 1
+        end
+      end
+      words.order!
+      return words
+    end
+    def tag_words
+      the_word_array = word_count
+      Agenda.dictionary.each do |name, words|
+        the_word_array.each do |word|
+          word.tags << name if words.include?(word.word)
+        end
+      end
+      the_word_array
+    end
+  end
+end

data/lib/agenda/version.rb ADDED

@@ -0,0 +1,3 @@
+module Agenda
+  VERSION = "0.1.2"
+end

data/lib/agenda/word.rb ADDED

@@ -0,0 +1,31 @@
+module Agenda
+  class Word
+    attr_accessor :word, :count, :tags
+    def initialize(the_word)
+      @word = the_word.downcase; @count = 0; @tags = []
+    end
+    def to_s
+      s = "#{@word}: #{@count}"
+      s += " (" + @tags.join(", ") + ")" unless @tags.empty?
+      s
+    end
+    def has_tag?(tag)
+      @tags.include? tag
+    end
+    def eql?(other)
+      other.word == @word
+    end
+    def ==(other)
+      if other.is_a? Agenda::Word
+        return other.word == @word
+      else other.is_a? String
+        return other == @word
+      end
+    end
+  end
+end

data/lib/agenda/wordarray.rb ADDED

@@ -0,0 +1,37 @@
+module Agenda
+  class WordArray < Array
+    def order!
+      self.sort_by! { |x| x.count || x.word }
+      self.reverse!
+    end
+    def has?(string)
+      result = get(string)
+      return result unless result != false
+      return true
+    end
+    def get(string)
+      self.each do |one_word|
+        return one_word if one_word.word == string.downcase
+      end
+      return false
+    end
+    def tag(the_tag)
+      result = WordArray.new
+      self.each do |one_word|
+        result << one_word if one_word.has_tag? the_tag
+      end
+      result
+    end
+    def not_tag(the_tag)
+      result = WordArray.new
+      self.each do |one_word|
+        result << one_word unless one_word.has_tag? the_tag
+      end
+      result
+    end
+  end
+end

data/spec/agenda/analyzer_specs.rb ADDED

@@ -0,0 +1,31 @@
+require "spec_helper"
+describe Agenda::Analyzer do
+  describe "#new" do
+    it "should fail on no string argument" do
+      lambda { Agenda::Analyzer.new }.should raise_error(ArgumentError)
+    end
+  end
+  describe "#word_count" do
+    context "the analyzer has a non-empty string" do
+      before :each do
+        @the_analyzer = Agenda::Analyzer.new "My non empty string"
+      end
+      it "should return a WordArray" do
+        @the_analyzer.word_count.should be_a(Agenda::WordArray)
+      end
+    end
+    context "the analyzer has an empty string" do
+      before :each do
+        @the_analyzer = Agenda::Analyzer.new ""
+      end
+      it "should return an empty word array" do
+        @the_analyzer.word_count.should be_empty
+      end
+    end
+  end
+end

data/spec/agenda/word_spec.rb ADDED

@@ -0,0 +1,20 @@
+require "spec_helper"
+describe Agenda::Word do
+  context "#has_tag?" do
+    it "should return true when tags includes it" do
+      the_word = Agenda::Word.new "man"
+      the_word.tags << :common
+      the_word.should have_tag(:common)
+    end
+  end
+  context "#eql?" do
+    it "should be true on same word with different count" do
+      the_word = Agenda::Word.new "man"
+      the_word.count = 3
+      other_word = Agenda::Word.new "man"
+      other_word.count = 1
+      the_word.should == other_word
+    end
+  end
+end

data/spec/spec_helper.rb ADDED

	@@ -0,0 +1 @@
1	+ require "agenda"

metadata ADDED

@@ -0,0 +1,83 @@
+--- !ruby/object:Gem::Specification
+name: agenda
+version: !ruby/object:Gem::Version
+  version: 0.1.2
+  prerelease:
+platform: ruby
+authors:
+- Xavier Via
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-08-08 00:00:00.000000000 -03:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: &21654444 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *21654444
+description: Text analysis package oriented to deconstruct discourse
+email:
+- xavier.via.canel@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- README.rdoc
+- Rakefile
+- agenda.gemspec
+- config/alternate-dictionary.yaml
+- config/dictionary.yaml
+- config/regexp.yaml
+- features/dictionaries.feature
+- features/steps/dictionaries.rb
+- features/steps/tagged_result.rb
+- features/steps/tagging.rb
+- features/steps/word_count.rb
+- features/support/env.rb
+- features/tagged_result.feature
+- features/tagging.feature
+- features/word_count.feature
+- lib/agenda.rb
+- lib/agenda/analyzer.rb
+- lib/agenda/version.rb
+- lib/agenda/word.rb
+- lib/agenda/wordarray.rb
+- spec/agenda/analyzer_specs.rb
+- spec/agenda/word_spec.rb
+- spec/spec_helper.rb
+has_rdoc: true
+homepage: ''
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project: agenda
+rubygems_version: 1.5.2
+signing_key:
+specification_version: 3
+summary: Text analysis package oriented to deconstruct discourse
+test_files: []