ivanvc-dictionary 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/README.rdoc +36 -1
- data/VERSION +1 -1
- data/bin/anagram_extractor +1 -1
- data/ext/anagram_extractor_c.c +63 -0
- data/ext/extconf.rb +4 -0
- data/extras/english.txt +1 -0
- data/lib/dictionary.rb +5 -3
- data/lib/dictionary/anagram_extractor.rb +18 -9
- data/spec/anagram_extactor_spec.rb +27 -15
- metadata +6 -4
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
@@ -2,12 +2,47 @@
|
|
2
2
|
|
3
3
|
A sample of an AnagramExtractor
|
4
4
|
|
5
|
-
|
5
|
+
== Usage
|
6
6
|
|
7
7
|
To extract the anagrams of a dictionary, use:
|
8
8
|
|
9
9
|
bin/anagram_extractor [source file] [destination file]
|
10
10
|
|
11
|
+
=== To use with the C binding
|
12
|
+
|
13
|
+
First, do:
|
14
|
+
|
15
|
+
cd ext
|
16
|
+
ruby extconf.rb
|
17
|
+
make
|
18
|
+
|
19
|
+
Then, run
|
20
|
+
|
21
|
+
bin/anagram_extractor [source file] [destination file]
|
22
|
+
|
23
|
+
== Benchmarks
|
24
|
+
|
25
|
+
+Done while playing music, browsing the web, etc.+
|
26
|
+
|
27
|
+
Using the small file:
|
28
|
+
|
29
|
+
user system total real
|
30
|
+
Ruby: 0.000000 0.000000 0.000000 ( 0.000424)
|
31
|
+
C: 0.010000 0.000000 0.010000 ( 0.000099)
|
32
|
+
|
33
|
+
Using the big file (3k lines):
|
34
|
+
|
35
|
+
user system total real
|
36
|
+
Ruby: 48.040000 0.380000 48.420000 ( 49.440496)
|
37
|
+
C: 0.180000 0.020000 0.200000 ( 0.197321)
|
38
|
+
|
39
|
+
Amazing! So the full English dictionary (236978 lines):
|
40
|
+
|
41
|
+
user system total real
|
42
|
+
C: 1876.590000 119.060000 1995.650000 (2057.623276)
|
43
|
+
|
44
|
+
So 34mins. for the full list of anagrams!
|
45
|
+
|
11
46
|
== Copyright
|
12
47
|
|
13
48
|
Copyright (c) 2010 Iván Valdés (@ivanvc).
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1
|
data/bin/anagram_extractor
CHANGED
@@ -0,0 +1,63 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "version.h"
|
3
|
+
#include "string.h"
|
4
|
+
|
5
|
+
VALUE rb_mAnagramExtractorC;
|
6
|
+
|
7
|
+
VALUE rb_mAnagramExtractorC_anagrams(VALUE rb_module, VALUE rb_first_word, VALUE rb_second_word)
|
8
|
+
{
|
9
|
+
char *first_word = STR2CSTR(rb_first_word);
|
10
|
+
char *second_word = STR2CSTR(rb_second_word);
|
11
|
+
int occurrences[26] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // So it doesn't have trash inside of the positions of the Array.
|
12
|
+
|
13
|
+
if(strlen(first_word) != strlen(second_word))
|
14
|
+
{
|
15
|
+
return Qfalse;
|
16
|
+
}
|
17
|
+
|
18
|
+
int length = strlen(first_word);
|
19
|
+
int i;
|
20
|
+
|
21
|
+
for(i = 0; i < length; ++i)
|
22
|
+
{
|
23
|
+
if(first_word[i] >= 'a' && first_word[i] <= 'z')
|
24
|
+
{
|
25
|
+
occurrences[first_word[i]-'a']++;
|
26
|
+
} else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
|
27
|
+
{
|
28
|
+
occurrences[first_word[i]-'A']++;
|
29
|
+
} else {
|
30
|
+
return Qfalse;
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
for(i = 0; i < length; ++i)
|
35
|
+
{
|
36
|
+
if(second_word[i] >= 'a' && second_word[i] <= 'z')
|
37
|
+
{
|
38
|
+
if(--occurrences[second_word[i]-'a'] < 0)
|
39
|
+
return Qfalse;
|
40
|
+
} else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
|
41
|
+
{
|
42
|
+
if(--occurrences[second_word[i]-'A'] < 0)
|
43
|
+
return Qfalse;
|
44
|
+
} else {
|
45
|
+
return Qfalse;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
for(i = 0; i < 26; ++i)
|
50
|
+
if(occurrences[i] != 0)
|
51
|
+
return Qfalse;
|
52
|
+
|
53
|
+
|
54
|
+
return Qtrue;
|
55
|
+
}
|
56
|
+
|
57
|
+
void Init_anagram_extractor_c()
|
58
|
+
{
|
59
|
+
VALUE rb_mDictionary = rb_const_get(rb_cObject, rb_intern("Dictionary"));
|
60
|
+
rb_mAnagramExtractorC = rb_define_module_under(rb_mDictionary, "AnagramExtractorC");
|
61
|
+
|
62
|
+
rb_define_method(rb_mAnagramExtractorC, "anagrams?", rb_mAnagramExtractorC_anagrams, 2);
|
63
|
+
}
|
data/ext/extconf.rb
ADDED
data/extras/english.txt
CHANGED
data/lib/dictionary.rb
CHANGED
@@ -5,15 +5,17 @@ module Dictionary
|
|
5
5
|
base_dir = File.expand_path(File.dirname(__FILE__) + '/dictionary') + '/'
|
6
6
|
autoload :AnagramExtractor, base_dir + 'anagram_extractor.rb'
|
7
7
|
autoload :Error, base_dir + 'error.rb'
|
8
|
+
autoload :AnagramExtractorC, base_dir + '../../ext/anagram_extractor_c.o'
|
8
9
|
|
9
10
|
# Extracts the anagrams from a file, and exports the results.
|
10
11
|
#
|
11
|
-
# @param [String, Pathname] file the location of the dictionary
|
12
|
+
# @param [String, Pathname] file the location of the dictionary.
|
12
13
|
# @param [String, Pathname] export_location the export location for the results.
|
14
|
+
# @param [true, false] in_c run the extraction in C.
|
13
15
|
# @return [String] The result of the extraction.
|
14
|
-
def self.extract_anagrams(file, export_location)
|
16
|
+
def self.extract_anagrams(file, export_location, in_c)
|
15
17
|
extractor = AnagramExtractor.new(file)
|
16
|
-
extractor.extract!
|
18
|
+
extractor.extract! in_c
|
17
19
|
extractor.export(export_location)
|
18
20
|
"Exported anagram list to #{export_location}."
|
19
21
|
rescue Error::FileNotFoundError
|
@@ -7,6 +7,8 @@ module Dictionary
|
|
7
7
|
# extractor.extract!
|
8
8
|
# extractor.export('anagrams.txt')
|
9
9
|
class AnagramExtractor
|
10
|
+
include AnagramExtractorC
|
11
|
+
|
10
12
|
# Holds the dictionary file.
|
11
13
|
attr_reader :file
|
12
14
|
# Holds the anagrams extracted.
|
@@ -32,16 +34,18 @@ module Dictionary
|
|
32
34
|
|
33
35
|
# Extracts the anagrams from the provided file.
|
34
36
|
#
|
35
|
-
# @
|
36
|
-
|
37
|
+
# @param [true, false] in_c Execute the code in C.
|
38
|
+
# @return [Array, nil] the anagram list, or nil if no dictionary.
|
39
|
+
def extract!(in_c=false)
|
40
|
+
return unless @file
|
37
41
|
reset_dictionaries
|
38
42
|
File.read(@file).each_line do |word|
|
39
43
|
word = word.strip
|
40
|
-
has_an_anagram = anagram_for? word
|
44
|
+
has_an_anagram = anagram_for? word, in_c
|
41
45
|
@anagrams += [word, has_an_anagram] if has_an_anagram
|
42
46
|
@dictionary << word
|
43
47
|
end
|
44
|
-
@anagrams
|
48
|
+
@anagrams = @anagrams.uniq
|
45
49
|
end
|
46
50
|
|
47
51
|
# Saves the anagram dictionary to a provided file.
|
@@ -67,14 +71,19 @@ module Dictionary
|
|
67
71
|
#
|
68
72
|
# @private
|
69
73
|
# @param [String] word the word to check
|
74
|
+
# @param [true, false] in_c execute the anagram verification in C
|
70
75
|
# @return [nil, String] nil if the dictionary is empty or, there are no anagrams for
|
71
76
|
# this word. Else, the matching word.
|
72
|
-
def anagram_for?(word)
|
73
|
-
word_letters = word.downcase.scan(/\w/).sort
|
77
|
+
def anagram_for?(word, in_c=false)
|
78
|
+
word_letters = word.downcase.scan(/\w/).sort unless in_c
|
74
79
|
@dictionary.find do |test_word|
|
75
|
-
|
76
|
-
|
77
|
-
|
80
|
+
if in_c
|
81
|
+
anagrams? word, test_word
|
82
|
+
else
|
83
|
+
test_word_letters = test_word.downcase.scan(/\w/)
|
84
|
+
test_word_letters.size == word_letters.size &&
|
85
|
+
test_word_letters.sort == word_letters
|
86
|
+
end
|
78
87
|
end
|
79
88
|
end
|
80
89
|
|
@@ -30,25 +30,37 @@ describe Dictionary::AnagramExtractor do
|
|
30
30
|
|
31
31
|
describe ".extract!" do
|
32
32
|
|
33
|
-
|
34
|
-
@extractor.
|
33
|
+
it "should return nil if no file" do
|
34
|
+
@extractor.extract!.should be_nil
|
35
35
|
end
|
36
36
|
|
37
|
-
|
38
|
-
@extractor.extract!.should be_an_instance_of(Array)
|
39
|
-
end
|
37
|
+
describe "with a file" do
|
40
38
|
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
before(:each) do
|
40
|
+
@extractor.file = 'extras/english.txt'
|
41
|
+
end
|
44
42
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
43
|
+
it "should return an Array" do
|
44
|
+
@extractor.extract!.should be_an_instance_of(Array)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should return four matches" do
|
48
|
+
@extractor.extract!.size.should == 5
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should return four matches even if words are capitalized" do
|
52
|
+
@extractor.file = 'extras/capitalized_english.txt'
|
53
|
+
@extractor.extract!.size.should == 4
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should contain mary and army as anagrams" do
|
57
|
+
@extractor.extract!.should include('mary', 'army')
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should not include army twice" do
|
61
|
+
@extractor.extract!.select { |value| value == 'army' }.size.should == 1
|
62
|
+
end
|
49
63
|
|
50
|
-
it "should contain mary and army as anagrams" do
|
51
|
-
@extractor.extract!.should include('mary', 'army')
|
52
64
|
end
|
53
65
|
|
54
66
|
end
|
@@ -86,7 +98,7 @@ describe Dictionary::AnagramExtractor do
|
|
86
98
|
@extractor.extract!
|
87
99
|
@extractor.export 'example.txt'
|
88
100
|
|
89
|
-
File.read(@location).split("\n").size.should ==
|
101
|
+
File.read(@location).split("\n").size.should == 5
|
90
102
|
end
|
91
103
|
|
92
104
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- "Iv\xC3\xA1n Vald\xC3\xA9s (@ivanvc)"
|
@@ -35,8 +35,8 @@ description: Dictionary
|
|
35
35
|
email: iv@nvald.es
|
36
36
|
executables:
|
37
37
|
- anagram_extractor
|
38
|
-
extensions:
|
39
|
-
|
38
|
+
extensions:
|
39
|
+
- ext/extconf.rb
|
40
40
|
extra_rdoc_files:
|
41
41
|
- README.rdoc
|
42
42
|
files:
|
@@ -46,6 +46,8 @@ files:
|
|
46
46
|
- Rakefile
|
47
47
|
- VERSION
|
48
48
|
- bin/anagram_extractor
|
49
|
+
- ext/anagram_extractor_c.c
|
50
|
+
- ext/extconf.rb
|
49
51
|
- extras/3k_english.txt
|
50
52
|
- extras/capitalized_english.txt
|
51
53
|
- extras/english.txt
|