agenda 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in agenda.gemspec
4
+ gemspec
@@ -0,0 +1,7 @@
1
+ = Agenda
2
+
3
+ Set of discourse analysis tools aimed at providing a fast redaction quality and tone feedback for auditors and editors.
4
+
5
+ == Current status
6
+
7
+ Agenda is currently under development, it has not been released as a gem yet.
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "agenda/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "agenda"
7
+ s.version = Agenda::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Xavier Via"]
10
+ s.email = ["xavier.via.canel@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Text analysis package oriented to deconstruct discourse}
13
+ s.description = %q{Text analysis package oriented to deconstruct discourse}
14
+
15
+ s.rubyforge_project = "agenda"
16
+
17
+ s.add_development_dependency "rspec"
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
+ s.require_paths = ["lib"]
23
+ end
@@ -0,0 +1 @@
1
+ freak: [elf, orc, dwarf, human]
@@ -0,0 +1,7 @@
1
+ proactive: [action, future]
2
+ negative: [disgrace]
3
+ neutral: [plane]
4
+ common: [of, the, in, a, and]
5
+ article: [the, a]
6
+ verb: [was, shape]
7
+ noun: [plane, disgrace]
@@ -0,0 +1,2 @@
1
+ discarded: >
2
+ \\\. \/(),{}*'\"¿?¡!“”\s\[\]
@@ -0,0 +1,52 @@
1
+ Feature: Dictionaries
2
+ In order to list special words
3
+ As an Agenda developer
4
+ I want to load YAML dictionaries
5
+
6
+ Scenario: Discarded words regexp
7
+ Given the file "config/regexp.yaml"
8
+ And it contains:
9
+ """
10
+ discarded:
11
+ """
12
+ When is loaded
13
+ Then I should get a discarded chars regexp
14
+
15
+ Scenario: Negatively loaded words
16
+ Given the file "config/dictionary.yaml"
17
+ And it contains:
18
+ """
19
+ negative: [disgrace
20
+ """
21
+ When is loaded
22
+ Then I should get a negative words dictionary
23
+ And negative dictionary should have "disgrace"
24
+
25
+ Scenario: Common words
26
+ Given the file "config/dictionary.yaml"
27
+ And it contains:
28
+ """
29
+ common: [of, the
30
+ """
31
+ When is loaded
32
+ Then I should get a common words dictionary
33
+
34
+ Scenario: Load dictionary
35
+ Given the file "config/alternate-dictionary.yaml"
36
+ And it contains:
37
+ """
38
+ freak: [elf, orc
39
+ """
40
+ When I load it
41
+ Then I should get a freak words dictionary
42
+ And I should get a common words dictionary
43
+
44
+ Scenario: Replace dictionary
45
+ Given the file "config/alternate-dictionary.yaml"
46
+ And it contains:
47
+ """
48
+ freak: [elf, orc
49
+ """
50
+ When I replace dictionaries with it
51
+ Then I should get a freak words dictionary
52
+ And I should not get a common words dictionary
@@ -0,0 +1,35 @@
1
+ Given /the file "(.+?)"$/ do |file|
2
+ @path = File.dirname(File.dirname(File.dirname(__FILE__))) + "/" + file
3
+ File.should exist(@path)
4
+ end
5
+
6
+ Given /it contains:/ do |content|
7
+ @the_content = File.read @path
8
+ @the_content.should include(content)
9
+ end
10
+
11
+ When /is loaded/ do; end
12
+
13
+ When /I load it/ do
14
+ Agenda.load_dictionary @path
15
+ end
16
+
17
+ When /I replace dictionaries with it/ do
18
+ Agenda.replace_dictionaries_with @path
19
+ end
20
+
21
+ Then /I should get a (.+?) words dictionary/ do |type|
22
+ Agenda.dictionary[type.to_sym].should_not be_empty
23
+ end
24
+
25
+ Then /I should get a (.+?) chars regexp/ do |type|
26
+ Agenda.regexp[type].should be_a(Regexp)
27
+ end
28
+
29
+ Then /^(.+?) dictionary should have "(.+?)"$/ do |dictionary, word|
30
+ Agenda.dictionary[dictionary.to_sym].should include(word)
31
+ end
32
+
33
+ Then /I should not get a (.+?) words dictionary/ do |type|
34
+ Agenda.dictionary.should_not have_key(type.to_sym)
35
+ end
@@ -0,0 +1,11 @@
1
+ Then /in :(.+?) I should get "(.+?)"$/ do |where, what|
2
+ @the_words.tag(where.to_sym).should include(Agenda::Word.new(what))
3
+ end
4
+
5
+ Then /in non :(.+?) I should not get "(.+?)"$/ do |where, what|
6
+ @the_words.not_tag(where.to_sym).should_not include(Agenda::Word.new(what))
7
+ end
8
+
9
+ Then /in non :(.+?) I should get "(.+?)"$/ do |where, what|
10
+ @the_words.not_tag(where.to_sym).should include(Agenda::Word.new(what))
11
+ end
@@ -0,0 +1,7 @@
1
+ When /I ask for word tagging/ do
2
+ @the_words = @the_text.agenda
3
+ end
4
+
5
+ Then /I should get :(.+?) in "(.+?)"$/ do |tag, word|
6
+ @the_words.get(word).should have_tag(tag.to_sym)
7
+ end
@@ -0,0 +1,21 @@
1
+ Given /^the text:$/ do |string|
2
+ @the_text = string
3
+ end
4
+
5
+ Given /the text "(.+)"$/ do |string|
6
+ @the_text = string
7
+ end
8
+
9
+ When /^I start word counting$/ do
10
+ @the_words = @the_text.agenda
11
+ end
12
+
13
+ Then /I should get (\d+) in "([^"]+)"/ do |number, text|
14
+ @the_words.get(text).count.should == number.to_i
15
+ end
16
+
17
+ Then /^I should get:$/ do |string|
18
+ pending do
19
+ @the_words.join("\n").should == string
20
+ end
21
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH << File.expand_path("../../../lib", __FILE__)
2
+ require "agenda"
@@ -0,0 +1,18 @@
1
+ Feature: Tagged result
2
+ In order to get an easily readable tagged result
3
+ As an user
4
+ I want to get tagged results
5
+
6
+ Scenario: Proactive tagging
7
+ Given the text "Take action and shape the future."
8
+ When I ask for word tagging
9
+ Then in :proactive I should get "action"
10
+ And in :verb I should get "shape"
11
+ And in :proactive I should get "future"
12
+
13
+ Scenario: Non-common words
14
+ Given the text "The task was easy and at hand."
15
+ When I ask for word tagging
16
+ Then in non :common I should not get "the"
17
+ And in non :common I should get "task"
18
+ And in non :common I should not get "and"
@@ -0,0 +1,20 @@
1
+ Feature: Word categorization within text
2
+ In order to distinguish tone of discourse
3
+ As a text analyst
4
+ I want to be able to tag words
5
+
6
+ Scenario: Loaded word
7
+ Given the text "The plane was a disgrace."
8
+ When I ask for word tagging
9
+ Then I should get 1 in "disgrace"
10
+ And I should get :negative in "disgrace"
11
+ And I should get :neutral in "plane"
12
+
13
+ Scenario: Type tagging
14
+ Given the text "The plane was a disgrace."
15
+ When I ask for word tagging
16
+ And I should get :article in "the"
17
+ And I should get :verb in "was"
18
+ And I should get :article in "a"
19
+ And I should get :noun in "plane"
20
+ And I should get :noun in "disgrace"
@@ -0,0 +1,78 @@
1
+ Feature: Count words from a text
2
+ In order to detect word saturation
3
+ As an amateur writer
4
+ I want to get an automated count of my words
5
+
6
+ Scenario: Simple word count
7
+ Given the text "I want to know to be"
8
+ When I start word counting
9
+ Then I should get 1 in "want"
10
+ And I should get 2 in "to"
11
+
12
+ Scenario: Discarding dots
13
+ Given the text "to be to."
14
+ When I start word counting
15
+ Then I should get 2 in "to"
16
+
17
+ Scenario: Discarding parenthesis, commas and slashes
18
+ Given the text "The (main) extra/thing, here, main."
19
+ When I start word counting
20
+ Then I should get 1 in "the"
21
+ And I should get 2 in "main"
22
+ And I should get 1 in "extra"
23
+ And I should get 1 in "thing"
24
+ And I should get 1 in "here"
25
+
26
+ Scenario: Discarding brackets, any bracket, and slashes
27
+ Given the text "{the building} [is neat] /truly\"
28
+ When I start word counting
29
+ Then I should get 1 in "the"
30
+ And I should get 1 in "building"
31
+ And I should get 1 in "is"
32
+ And I should get 1 in "neat"
33
+ And I should get 1 in "truly"
34
+
35
+ Scenario: Discarding asterisks, apostrophes and quotes
36
+ Given the text "*in* my' quotes""
37
+ When I start word counting
38
+ Then I should get 1 in "in"
39
+ And I should get 1 in "my"
40
+ And I should get 1 in "quotes"
41
+
42
+ Scenario: Discarding exclamation marks and interrogation marks
43
+ Given the text "¡Yes!, ¿What?"
44
+ When I start word counting
45
+ Then I should get 1 in "yes"
46
+ And I should get 1 in "what"
47
+
48
+ Scenario: Discarding special quotes
49
+ Given the text "“Special quotes”"
50
+ When I start word counting
51
+ Then I should get 1 in "special"
52
+ And I should get 1 in "quotes"
53
+
54
+ Scenario: Everything downcase
55
+ Given the text "To be to"
56
+ When I start word counting
57
+ Then I should get 2 in "to"
58
+ And I should get 2 in "To"
59
+ And I should get 2 in "tO"
60
+ And I should get 2 in "TO"
61
+
62
+ Scenario: Render in order a set of words
63
+ Given the text:
64
+ """
65
+ I use a given word a lot of times. A lot.
66
+ """
67
+ When I start word counting
68
+ Then I should get:
69
+ """
70
+ a: 3
71
+ lot: 2
72
+ i: 1
73
+ use: 1
74
+ given: 1
75
+ word: 1
76
+ of: 1
77
+ times: 1
78
+ """
@@ -0,0 +1,40 @@
1
+ require "yaml"
2
+
3
+ module Agenda
4
+
5
+ base_path = File.dirname(File.dirname(File.expand_path(__FILE__)))
6
+
7
+ @@base_dictionary = YAML.load_file "#{base_path}/config/dictionary.yaml"
8
+ @@dictionary = {}
9
+ @@base_dictionary.each do |key, value|
10
+ @@dictionary[key.to_sym] = value
11
+ end
12
+
13
+ @@regexp = YAML.load_file "#{base_path}/config/regexp.yaml"
14
+
15
+ @@regexp.each do |key, value|
16
+ @@regexp[key] = Regexp.new "[#{value}]"
17
+ end
18
+
19
+ def self.dictionary; @@dictionary; end
20
+ def self.regexp; @@regexp; end
21
+
22
+ def self.load_dictionary(path)
23
+ YAML.load_file(path).each do |key, value|
24
+ @@dictionary[key.to_sym] = value
25
+ end
26
+ end
27
+ def self.replace_dictionaries_with(path)
28
+ @@dictionary = {}
29
+ Agenda.load_dictionary(path)
30
+ end
31
+ end
32
+
33
+ # Gem own requires
34
+ require "agenda/word"
35
+ require "agenda/wordarray"
36
+ require "agenda/analyzer"
37
+
38
+ class String
39
+ def agenda; Agenda::Analyzer.new(self).tag_words; end
40
+ end
@@ -0,0 +1,31 @@
1
+ module Agenda
2
+ class Analyzer
3
+ def initialize(the_string)
4
+ @the_string = the_string.force_encoding "UTF-8"
5
+ end
6
+
7
+ def word_count
8
+ words = WordArray.new
9
+ the_discarded_regexp = Agenda.regexp["discarded"]
10
+
11
+ @the_string.split(the_discarded_regexp).each do |word|
12
+ if word != ""
13
+ words << Word.new(word) unless words.has?(word)
14
+ words.get(word).count += 1
15
+ end
16
+ end
17
+ words.order!
18
+ return words
19
+ end
20
+
21
+ def tag_words
22
+ the_word_array = word_count
23
+ Agenda.dictionary.each do |name, words|
24
+ the_word_array.each do |word|
25
+ word.tags << name if words.include?(word.word)
26
+ end
27
+ end
28
+ the_word_array
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ module Agenda
2
+ VERSION = "0.1.2"
3
+ end
@@ -0,0 +1,31 @@
1
+ module Agenda
2
+ class Word
3
+ attr_accessor :word, :count, :tags
4
+
5
+ def initialize(the_word)
6
+ @word = the_word.downcase; @count = 0; @tags = []
7
+ end
8
+
9
+ def to_s
10
+ s = "#{@word}: #{@count}"
11
+ s += " (" + @tags.join(", ") + ")" unless @tags.empty?
12
+ s
13
+ end
14
+
15
+ def has_tag?(tag)
16
+ @tags.include? tag
17
+ end
18
+
19
+ def eql?(other)
20
+ other.word == @word
21
+ end
22
+
23
+ def ==(other)
24
+ if other.is_a? Agenda::Word
25
+ return other.word == @word
26
+ else other.is_a? String
27
+ return other == @word
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,37 @@
1
+ module Agenda
2
+ class WordArray < Array
3
+ def order!
4
+ self.sort_by! { |x| x.count || x.word }
5
+ self.reverse!
6
+ end
7
+
8
+ def has?(string)
9
+ result = get(string)
10
+ return result unless result != false
11
+ return true
12
+ end
13
+
14
+ def get(string)
15
+ self.each do |one_word|
16
+ return one_word if one_word.word == string.downcase
17
+ end
18
+ return false
19
+ end
20
+
21
+ def tag(the_tag)
22
+ result = WordArray.new
23
+ self.each do |one_word|
24
+ result << one_word if one_word.has_tag? the_tag
25
+ end
26
+ result
27
+ end
28
+
29
+ def not_tag(the_tag)
30
+ result = WordArray.new
31
+ self.each do |one_word|
32
+ result << one_word unless one_word.has_tag? the_tag
33
+ end
34
+ result
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,31 @@
1
+ require "spec_helper"
2
+
3
+ describe Agenda::Analyzer do
4
+ describe "#new" do
5
+ it "should fail on no string argument" do
6
+ lambda { Agenda::Analyzer.new }.should raise_error(ArgumentError)
7
+ end
8
+ end
9
+
10
+ describe "#word_count" do
11
+ context "the analyzer has a non-empty string" do
12
+ before :each do
13
+ @the_analyzer = Agenda::Analyzer.new "My non empty string"
14
+ end
15
+
16
+ it "should return a WordArray" do
17
+ @the_analyzer.word_count.should be_a(Agenda::WordArray)
18
+ end
19
+ end
20
+
21
+ context "the analyzer has an empty string" do
22
+ before :each do
23
+ @the_analyzer = Agenda::Analyzer.new ""
24
+ end
25
+
26
+ it "should return an empty word array" do
27
+ @the_analyzer.word_count.should be_empty
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,20 @@
1
+ require "spec_helper"
2
+
3
+ describe Agenda::Word do
4
+ context "#has_tag?" do
5
+ it "should return true when tags includes it" do
6
+ the_word = Agenda::Word.new "man"
7
+ the_word.tags << :common
8
+ the_word.should have_tag(:common)
9
+ end
10
+ end
11
+ context "#eql?" do
12
+ it "should be true on same word with different count" do
13
+ the_word = Agenda::Word.new "man"
14
+ the_word.count = 3
15
+ other_word = Agenda::Word.new "man"
16
+ other_word.count = 1
17
+ the_word.should == other_word
18
+ end
19
+ end
20
+ end
@@ -0,0 +1 @@
1
+ require "agenda"
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: agenda
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Xavier Via
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-08-08 00:00:00.000000000 -03:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ requirement: &21654444 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *21654444
26
+ description: Text analysis package oriented to deconstruct discourse
27
+ email:
28
+ - xavier.via.canel@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - .gitignore
34
+ - Gemfile
35
+ - README.rdoc
36
+ - Rakefile
37
+ - agenda.gemspec
38
+ - config/alternate-dictionary.yaml
39
+ - config/dictionary.yaml
40
+ - config/regexp.yaml
41
+ - features/dictionaries.feature
42
+ - features/steps/dictionaries.rb
43
+ - features/steps/tagged_result.rb
44
+ - features/steps/tagging.rb
45
+ - features/steps/word_count.rb
46
+ - features/support/env.rb
47
+ - features/tagged_result.feature
48
+ - features/tagging.feature
49
+ - features/word_count.feature
50
+ - lib/agenda.rb
51
+ - lib/agenda/analyzer.rb
52
+ - lib/agenda/version.rb
53
+ - lib/agenda/word.rb
54
+ - lib/agenda/wordarray.rb
55
+ - spec/agenda/analyzer_specs.rb
56
+ - spec/agenda/word_spec.rb
57
+ - spec/spec_helper.rb
58
+ has_rdoc: true
59
+ homepage: ''
60
+ licenses: []
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project: agenda
79
+ rubygems_version: 1.5.2
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: Text analysis package oriented to deconstruct discourse
83
+ test_files: []