corpus-processor 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,37 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe CorpusProcessor::Processor do
4
- subject(:processor) { CorpusProcessor::Processor.new(parser, generator) }
5
-
6
- describe "#process" do
7
- subject { processor.process(corpus) }
8
-
9
- let(:corpus) { "Some corpus" }
10
- let(:processed_corpus) {
11
- <<-CORPUS
12
- Some O
13
- corpus O
14
- CORPUS
15
- }
16
- let(:tokens) {
17
- [
18
- CorpusProcessor::Token.new("Some"),
19
- CorpusProcessor::Token.new("corpus"),
20
- ]
21
- }
22
- let(:parser) { double :parser }
23
- let(:generator) { double :generator }
24
-
25
- specify {
26
- parser.should_receive(:parse)
27
- .with(corpus)
28
- .and_return(tokens)
29
-
30
- generator.should_receive(:generate)
31
- .with(tokens)
32
- .and_return(processed_corpus)
33
-
34
- subject.should == processed_corpus
35
- }
36
- end
37
- end
@@ -1,8 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe CorpusProcessor::Token do
4
- subject { CorpusProcessor::Token.new }
5
-
6
- it { should respond_to(:word) }
7
- it { should respond_to(:category) }
8
- end
@@ -1,121 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe CorpusProcessor::Tokenizer do
4
- subject(:tokenizer) { CorpusProcessor::Tokenizer.new }
5
-
6
- describe "#tokenize" do
7
- subject { tokenizer.tokenize(text, category) }
8
-
9
- let(:category) { nil }
10
-
11
- context "empty string" do
12
- let(:text) { "" }
13
-
14
- it { should == [] }
15
- end
16
-
17
- context "one word" do
18
- let(:text) { "banana" }
19
-
20
- it { should == [CorpusProcessor::Token.new("banana")] }
21
- end
22
-
23
- context "two words" do
24
- let(:text) { "good banana" }
25
-
26
- it { should == [
27
- CorpusProcessor::Token.new("good"),
28
- CorpusProcessor::Token.new("banana"),
29
- ] }
30
- end
31
-
32
- context "ponctuation" do
33
- let(:text) { "good, banana" }
34
-
35
- it { should == [
36
- CorpusProcessor::Token.new("good"),
37
- CorpusProcessor::Token.new("banana"),
38
- ] }
39
- end
40
-
41
- context "default category" do
42
- let(:text) { "Google" }
43
- let(:category) { :organization }
44
-
45
- it { should == [
46
- CorpusProcessor::Token.new("Google", :organization),
47
- ] }
48
- end
49
-
50
- context "with tags" do
51
- let(:text) { "good<lalala/>, banana" }
52
-
53
- it { should == [
54
- CorpusProcessor::Token.new("good"),
55
- CorpusProcessor::Token.new("banana"),
56
- ] }
57
- end
58
- end
59
-
60
- describe "#strip_tags" do
61
- subject { tokenizer.strip_tags(text) }
62
-
63
- context "empty text" do
64
- let(:text) { "" }
65
-
66
- it { should == "" }
67
- end
68
-
69
- context "self closed tag" do
70
- let(:text) { "<br/>" }
71
-
72
- it { should == "" }
73
- end
74
-
75
- context "tag with content" do
76
- let(:text) { "<p>Some text</p>" }
77
-
78
- it { should == "Some text" }
79
- end
80
-
81
- context "content after tag" do
82
- let(:text) { "<p>Some<br/>text</p>" }
83
-
84
- it { should == "Some text" }
85
- end
86
- end
87
-
88
- describe "#join_lines" do
89
- subject { tokenizer.join_lines(text) }
90
-
91
- context "empty text" do
92
- let(:text) { "" }
93
-
94
- it { should == "" }
95
- end
96
-
97
- context "one word" do
98
- let(:text) { "banana" }
99
-
100
- it { should == "banana" }
101
- end
102
-
103
- context "two lines" do
104
- let(:text) { "banana\nquiabo" }
105
-
106
- it { should == "banana quiabo" }
107
- end
108
-
109
- context "line with empty space" do
110
- let(:text) { "banana\n \nquiabo" }
111
-
112
- it { should == "banana quiabo" }
113
- end
114
-
115
- context "leading spaces" do
116
- let(:text) { " \n banana\n \nquiabo \n" }
117
-
118
- it { should == "banana quiabo" }
119
- end
120
- end
121
- end
@@ -1,68 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe CorpusProcessor::Traverser do
4
- subject(:traverser) { CorpusProcessor::Traverser.new }
5
-
6
- describe "#traverse" do
7
- subject { traverser.traverse(text, regexp) }
8
-
9
- context "empty text" do
10
- let(:text) { "" }
11
- let(:regexp) { // }
12
-
13
- specify {
14
- expect { |mock_block|
15
- traverser.traverse(text, regexp, &mock_block)
16
- }.not_to yield_control
17
- }
18
- end
19
-
20
- context "simple text" do
21
- let(:text) { "abc" }
22
- let(:regexp) { /b/ }
23
-
24
- specify {
25
- expect { |mock_block|
26
- traverser.traverse(text, regexp, &mock_block)
27
- }.to yield_successive_args "a", text.match(regexp), "c"
28
- }
29
- end
30
-
31
- context "two matches" do
32
- let(:text) { "abcbd" }
33
- let(:regexp) { /b/ }
34
-
35
- specify {
36
- expect { |mock_block|
37
- traverser.traverse(text, regexp, &mock_block)
38
- }.to yield_successive_args "a",
39
- text.match(regexp),
40
- "c",
41
- text[2..-1].match(regexp),
42
- "d"
43
- }
44
- end
45
-
46
- context "match in beginning" do
47
- let(:text) { "bc" }
48
- let(:regexp) { /b/ }
49
-
50
- specify {
51
- expect { |mock_block|
52
- traverser.traverse(text, regexp, &mock_block)
53
- }.to yield_successive_args text.match(regexp), "c"
54
- }
55
- end
56
-
57
- context "match in ending" do
58
- let(:text) { "bc" }
59
- let(:regexp) { /c/ }
60
-
61
- specify {
62
- expect { |mock_block|
63
- traverser.traverse(text, regexp, &mock_block)
64
- }.to yield_successive_args "b", text.match(regexp)
65
- }
66
- end
67
- end
68
- end