agenda 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in agenda.gemspec
4
+ gemspec
@@ -0,0 +1,7 @@
1
+ = Agenda
2
+
3
+ Set of discourse analysis tools aimed at providing a fast redaction quality and tone feedback for auditors and editors.
4
+
5
+ == Current status
6
+
7
+ Agenda is currently under development, it has not been released as a gem yet.
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "agenda/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "agenda"
7
+ s.version = Agenda::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Xavier Via"]
10
+ s.email = ["xavier.via.canel@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{Text analysis package oriented to deconstruct discourse}
13
+ s.description = %q{Text analysis package oriented to deconstruct discourse}
14
+
15
+ s.rubyforge_project = "agenda"
16
+
17
+ s.add_development_dependency "rspec"
18
+
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
+ s.require_paths = ["lib"]
23
+ end
@@ -0,0 +1 @@
1
+ freak: [elf, orc, dwarf, human]
@@ -0,0 +1,7 @@
1
+ proactive: [action, future]
2
+ negative: [disgrace]
3
+ neutral: [plane]
4
+ common: [of, the, in, a, and]
5
+ article: [the, a]
6
+ verb: [was, shape]
7
+ noun: [plane, disgrace]
@@ -0,0 +1,2 @@
1
+ discarded: >
2
+ \\\. \/(),{}*'\"¿?¡!“”\s\[\]
@@ -0,0 +1,52 @@
1
+ Feature: Dictionaries
2
+ In order to list special words
3
+ As an Agenda developer
4
+ I want to load YAML dictionaries
5
+
6
+ Scenario: Discarded words regexp
7
+ Given the file "config/regexp.yaml"
8
+ And it contains:
9
+ """
10
+ discarded:
11
+ """
12
+ When is loaded
13
+ Then I should get a discarded chars regexp
14
+
15
+ Scenario: Negatively loaded words
16
+ Given the file "config/dictionary.yaml"
17
+ And it contains:
18
+ """
19
+ negative: [disgrace
20
+ """
21
+ When is loaded
22
+ Then I should get a negative words dictionary
23
+ And negative dictionary should have "disgrace"
24
+
25
+ Scenario: Common words
26
+ Given the file "config/dictionary.yaml"
27
+ And it contains:
28
+ """
29
+ common: [of, the
30
+ """
31
+ When is loaded
32
+ Then I should get a common words dictionary
33
+
34
+ Scenario: Load dictionary
35
+ Given the file "config/alternate-dictionary.yaml"
36
+ And it contains:
37
+ """
38
+ freak: [elf, orc
39
+ """
40
+ When I load it
41
+ Then I should get a freak words dictionary
42
+ And I should get a common words dictionary
43
+
44
+ Scenario: Replace dictionary
45
+ Given the file "config/alternate-dictionary.yaml"
46
+ And it contains:
47
+ """
48
+ freak: [elf, orc
49
+ """
50
+ When I replace dictionaries with it
51
+ Then I should get a freak words dictionary
52
+ And I should not get a common words dictionary
@@ -0,0 +1,35 @@
1
+ Given /the file "(.+?)"$/ do |file|
2
+ @path = File.dirname(File.dirname(File.dirname(__FILE__))) + "/" + file
3
+ File.should exist(@path)
4
+ end
5
+
6
+ Given /it contains:/ do |content|
7
+ @the_content = File.read @path
8
+ @the_content.should include(content)
9
+ end
10
+
11
+ When /is loaded/ do; end
12
+
13
+ When /I load it/ do
14
+ Agenda.load_dictionary @path
15
+ end
16
+
17
+ When /I replace dictionaries with it/ do
18
+ Agenda.replace_dictionaries_with @path
19
+ end
20
+
21
+ Then /I should get a (.+?) words dictionary/ do |type|
22
+ Agenda.dictionary[type.to_sym].should_not be_empty
23
+ end
24
+
25
+ Then /I should get a (.+?) chars regexp/ do |type|
26
+ Agenda.regexp[type].should be_a(Regexp)
27
+ end
28
+
29
+ Then /^(.+?) dictionary should have "(.+?)"$/ do |dictionary, word|
30
+ Agenda.dictionary[dictionary.to_sym].should include(word)
31
+ end
32
+
33
+ Then /I should not get a (.+?) words dictionary/ do |type|
34
+ Agenda.dictionary.should_not have_key(type.to_sym)
35
+ end
@@ -0,0 +1,11 @@
1
+ Then /in :(.+?) I should get "(.+?)"$/ do |where, what|
2
+ @the_words.tag(where.to_sym).should include(Agenda::Word.new(what))
3
+ end
4
+
5
+ Then /in non :(.+?) I should not get "(.+?)"$/ do |where, what|
6
+ @the_words.not_tag(where.to_sym).should_not include(Agenda::Word.new(what))
7
+ end
8
+
9
+ Then /in non :(.+?) I should get "(.+?)"$/ do |where, what|
10
+ @the_words.not_tag(where.to_sym).should include(Agenda::Word.new(what))
11
+ end
@@ -0,0 +1,7 @@
1
+ When /I ask for word tagging/ do
2
+ @the_words = @the_text.agenda
3
+ end
4
+
5
+ Then /I should get :(.+?) in "(.+?)"$/ do |tag, word|
6
+ @the_words.get(word).should have_tag(tag.to_sym)
7
+ end
@@ -0,0 +1,21 @@
1
+ Given /^the text:$/ do |string|
2
+ @the_text = string
3
+ end
4
+
5
+ Given /the text "(.+)"$/ do |string|
6
+ @the_text = string
7
+ end
8
+
9
+ When /^I start word counting$/ do
10
+ @the_words = @the_text.agenda
11
+ end
12
+
13
+ Then /I should get (\d+) in "([^"]+)"/ do |number, text|
14
+ @the_words.get(text).count.should == number.to_i
15
+ end
16
+
17
+ Then /^I should get:$/ do |string|
18
+ pending do
19
+ @the_words.join("\n").should == string
20
+ end
21
+ end
@@ -0,0 +1,2 @@
1
+ $LOAD_PATH << File.expand_path("../../../lib", __FILE__)
2
+ require "agenda"
@@ -0,0 +1,18 @@
1
+ Feature: Tagged result
2
+ In order to get an easily readable tagged result
3
+ As an user
4
+ I want to get tagged results
5
+
6
+ Scenario: Proactive tagging
7
+ Given the text "Take action and shape the future."
8
+ When I ask for word tagging
9
+ Then in :proactive I should get "action"
10
+ And in :verb I should get "shape"
11
+ And in :proactive I should get "future"
12
+
13
+ Scenario: Non-common words
14
+ Given the text "The task was easy and at hand."
15
+ When I ask for word tagging
16
+ Then in non :common I should not get "the"
17
+ And in non :common I should get "task"
18
+ And in non :common I should not get "and"
@@ -0,0 +1,20 @@
1
+ Feature: Word categorization within text
2
+ In order to distinguish tone of discourse
3
+ As a text analyst
4
+ I want to be able to tag words
5
+
6
+ Scenario: Loaded word
7
+ Given the text "The plane was a disgrace."
8
+ When I ask for word tagging
9
+ Then I should get 1 in "disgrace"
10
+ And I should get :negative in "disgrace"
11
+ And I should get :neutral in "plane"
12
+
13
+ Scenario: Type tagging
14
+ Given the text "The plane was a disgrace."
15
+ When I ask for word tagging
16
+ And I should get :article in "the"
17
+ And I should get :verb in "was"
18
+ And I should get :article in "a"
19
+ And I should get :noun in "plane"
20
+ And I should get :noun in "disgrace"
@@ -0,0 +1,78 @@
1
+ Feature: Count words from a text
2
+ In order to detect word saturation
3
+ As an amateur writer
4
+ I want to get an automated count of my words
5
+
6
+ Scenario: Simple word count
7
+ Given the text "I want to know to be"
8
+ When I start word counting
9
+ Then I should get 1 in "want"
10
+ And I should get 2 in "to"
11
+
12
+ Scenario: Discarding dots
13
+ Given the text "to be to."
14
+ When I start word counting
15
+ Then I should get 2 in "to"
16
+
17
+ Scenario: Discarding parenthesis, commas and slashes
18
+ Given the text "The (main) extra/thing, here, main."
19
+ When I start word counting
20
+ Then I should get 1 in "the"
21
+ And I should get 2 in "main"
22
+ And I should get 1 in "extra"
23
+ And I should get 1 in "thing"
24
+ And I should get 1 in "here"
25
+
26
+ Scenario: Discarding brackets, any bracket, and slashes
27
+ Given the text "{the building} [is neat] /truly\"
28
+ When I start word counting
29
+ Then I should get 1 in "the"
30
+ And I should get 1 in "building"
31
+ And I should get 1 in "is"
32
+ And I should get 1 in "neat"
33
+ And I should get 1 in "truly"
34
+
35
+ Scenario: Discarding asterisks, apostrophes and quotes
36
+ Given the text "*in* my' quotes""
37
+ When I start word counting
38
+ Then I should get 1 in "in"
39
+ And I should get 1 in "my"
40
+ And I should get 1 in "quotes"
41
+
42
+ Scenario: Discarding exclamation marks and interrogation marks
43
+ Given the text "¡Yes!, ¿What?"
44
+ When I start word counting
45
+ Then I should get 1 in "yes"
46
+ And I should get 1 in "what"
47
+
48
+ Scenario: Discarding special quotes
49
+ Given the text "“Special quotes”"
50
+ When I start word counting
51
+ Then I should get 1 in "special"
52
+ And I should get 1 in "quotes"
53
+
54
+ Scenario: Everything downcase
55
+ Given the text "To be to"
56
+ When I start word counting
57
+ Then I should get 2 in "to"
58
+ And I should get 2 in "To"
59
+ And I should get 2 in "tO"
60
+ And I should get 2 in "TO"
61
+
62
+ Scenario: Render in order a set of words
63
+ Given the text:
64
+ """
65
+ I use a given word a lot of times. A lot.
66
+ """
67
+ When I start word counting
68
+ Then I should get:
69
+ """
70
+ a: 3
71
+ lot: 2
72
+ i: 1
73
+ use: 1
74
+ given: 1
75
+ word: 1
76
+ of: 1
77
+ times: 1
78
+ """
@@ -0,0 +1,40 @@
1
+ require "yaml"
2
+
3
+ module Agenda
4
+
5
+ base_path = File.dirname(File.dirname(File.expand_path(__FILE__)))
6
+
7
+ @@base_dictionary = YAML.load_file "#{base_path}/config/dictionary.yaml"
8
+ @@dictionary = {}
9
+ @@base_dictionary.each do |key, value|
10
+ @@dictionary[key.to_sym] = value
11
+ end
12
+
13
+ @@regexp = YAML.load_file "#{base_path}/config/regexp.yaml"
14
+
15
+ @@regexp.each do |key, value|
16
+ @@regexp[key] = Regexp.new "[#{value}]"
17
+ end
18
+
19
+ def self.dictionary; @@dictionary; end
20
+ def self.regexp; @@regexp; end
21
+
22
+ def self.load_dictionary(path)
23
+ YAML.load_file(path).each do |key, value|
24
+ @@dictionary[key.to_sym] = value
25
+ end
26
+ end
27
+ def self.replace_dictionaries_with(path)
28
+ @@dictionary = {}
29
+ Agenda.load_dictionary(path)
30
+ end
31
+ end
32
+
33
+ # Gem own requires
34
+ require "agenda/word"
35
+ require "agenda/wordarray"
36
+ require "agenda/analyzer"
37
+
38
+ class String
39
+ def agenda; Agenda::Analyzer.new(self).tag_words; end
40
+ end
@@ -0,0 +1,31 @@
1
+ module Agenda
2
+ class Analyzer
3
+ def initialize(the_string)
4
+ @the_string = the_string.force_encoding "UTF-8"
5
+ end
6
+
7
+ def word_count
8
+ words = WordArray.new
9
+ the_discarded_regexp = Agenda.regexp["discarded"]
10
+
11
+ @the_string.split(the_discarded_regexp).each do |word|
12
+ if word != ""
13
+ words << Word.new(word) unless words.has?(word)
14
+ words.get(word).count += 1
15
+ end
16
+ end
17
+ words.order!
18
+ return words
19
+ end
20
+
21
+ def tag_words
22
+ the_word_array = word_count
23
+ Agenda.dictionary.each do |name, words|
24
+ the_word_array.each do |word|
25
+ word.tags << name if words.include?(word.word)
26
+ end
27
+ end
28
+ the_word_array
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,3 @@
1
+ module Agenda
2
+ VERSION = "0.1.2"
3
+ end
@@ -0,0 +1,31 @@
1
+ module Agenda
2
+ class Word
3
+ attr_accessor :word, :count, :tags
4
+
5
+ def initialize(the_word)
6
+ @word = the_word.downcase; @count = 0; @tags = []
7
+ end
8
+
9
+ def to_s
10
+ s = "#{@word}: #{@count}"
11
+ s += " (" + @tags.join(", ") + ")" unless @tags.empty?
12
+ s
13
+ end
14
+
15
+ def has_tag?(tag)
16
+ @tags.include? tag
17
+ end
18
+
19
+ def eql?(other)
20
+ other.word == @word
21
+ end
22
+
23
+ def ==(other)
24
+ if other.is_a? Agenda::Word
25
+ return other.word == @word
26
+ else other.is_a? String
27
+ return other == @word
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,37 @@
1
+ module Agenda
2
+ class WordArray < Array
3
+ def order!
4
+ self.sort_by! { |x| x.count || x.word }
5
+ self.reverse!
6
+ end
7
+
8
+ def has?(string)
9
+ result = get(string)
10
+ return result unless result != false
11
+ return true
12
+ end
13
+
14
+ def get(string)
15
+ self.each do |one_word|
16
+ return one_word if one_word.word == string.downcase
17
+ end
18
+ return false
19
+ end
20
+
21
+ def tag(the_tag)
22
+ result = WordArray.new
23
+ self.each do |one_word|
24
+ result << one_word if one_word.has_tag? the_tag
25
+ end
26
+ result
27
+ end
28
+
29
+ def not_tag(the_tag)
30
+ result = WordArray.new
31
+ self.each do |one_word|
32
+ result << one_word unless one_word.has_tag? the_tag
33
+ end
34
+ result
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,31 @@
1
+ require "spec_helper"
2
+
3
+ describe Agenda::Analyzer do
4
+ describe "#new" do
5
+ it "should fail on no string argument" do
6
+ lambda { Agenda::Analyzer.new }.should raise_error(ArgumentError)
7
+ end
8
+ end
9
+
10
+ describe "#word_count" do
11
+ context "the analyzer has a non-empty string" do
12
+ before :each do
13
+ @the_analyzer = Agenda::Analyzer.new "My non empty string"
14
+ end
15
+
16
+ it "should return a WordArray" do
17
+ @the_analyzer.word_count.should be_a(Agenda::WordArray)
18
+ end
19
+ end
20
+
21
+ context "the analyzer has an empty string" do
22
+ before :each do
23
+ @the_analyzer = Agenda::Analyzer.new ""
24
+ end
25
+
26
+ it "should return an empty word array" do
27
+ @the_analyzer.word_count.should be_empty
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,20 @@
1
+ require "spec_helper"
2
+
3
+ describe Agenda::Word do
4
+ context "#has_tag?" do
5
+ it "should return true when tags includes it" do
6
+ the_word = Agenda::Word.new "man"
7
+ the_word.tags << :common
8
+ the_word.should have_tag(:common)
9
+ end
10
+ end
11
+ context "#eql?" do
12
+ it "should be true on same word with different count" do
13
+ the_word = Agenda::Word.new "man"
14
+ the_word.count = 3
15
+ other_word = Agenda::Word.new "man"
16
+ other_word.count = 1
17
+ the_word.should == other_word
18
+ end
19
+ end
20
+ end
@@ -0,0 +1 @@
1
+ require "agenda"
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: agenda
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Xavier Via
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-08-08 00:00:00.000000000 -03:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ requirement: &21654444 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *21654444
26
+ description: Text analysis package oriented to deconstruct discourse
27
+ email:
28
+ - xavier.via.canel@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - .gitignore
34
+ - Gemfile
35
+ - README.rdoc
36
+ - Rakefile
37
+ - agenda.gemspec
38
+ - config/alternate-dictionary.yaml
39
+ - config/dictionary.yaml
40
+ - config/regexp.yaml
41
+ - features/dictionaries.feature
42
+ - features/steps/dictionaries.rb
43
+ - features/steps/tagged_result.rb
44
+ - features/steps/tagging.rb
45
+ - features/steps/word_count.rb
46
+ - features/support/env.rb
47
+ - features/tagged_result.feature
48
+ - features/tagging.feature
49
+ - features/word_count.feature
50
+ - lib/agenda.rb
51
+ - lib/agenda/analyzer.rb
52
+ - lib/agenda/version.rb
53
+ - lib/agenda/word.rb
54
+ - lib/agenda/wordarray.rb
55
+ - spec/agenda/analyzer_specs.rb
56
+ - spec/agenda/word_spec.rb
57
+ - spec/spec_helper.rb
58
+ has_rdoc: true
59
+ homepage: ''
60
+ licenses: []
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project: agenda
79
+ rubygems_version: 1.5.2
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: Text analysis package oriented to deconstruct discourse
83
+ test_files: []