ivanvc-dictionary 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/README.rdoc +36 -1
- data/VERSION +1 -1
- data/bin/anagram_extractor +1 -1
- data/ext/anagram_extractor_c.c +63 -0
- data/ext/extconf.rb +4 -0
- data/extras/english.txt +1 -0
- data/lib/dictionary.rb +5 -3
- data/lib/dictionary/anagram_extractor.rb +18 -9
- data/spec/anagram_extactor_spec.rb +27 -15
- metadata +6 -4
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
@@ -2,12 +2,47 @@
|
|
2
2
|
|
3
3
|
A sample of an AnagramExtractor
|
4
4
|
|
5
|
-
|
5
|
+
== Usage
|
6
6
|
|
7
7
|
To extract the anagrams of a dictionary, use:
|
8
8
|
|
9
9
|
bin/anagram_extractor [source file] [destination file]
|
10
10
|
|
11
|
+
=== To use with the C binding
|
12
|
+
|
13
|
+
First, do:
|
14
|
+
|
15
|
+
cd ext
|
16
|
+
ruby extconf.rb
|
17
|
+
make
|
18
|
+
|
19
|
+
Then, run
|
20
|
+
|
21
|
+
bin/anagram_extractor [source file] [destination file]
|
22
|
+
|
23
|
+
== Benchmarks
|
24
|
+
|
25
|
+
+Done while playing music, browsing the web, etc.+
|
26
|
+
|
27
|
+
Using the small file:
|
28
|
+
|
29
|
+
user system total real
|
30
|
+
Ruby: 0.000000 0.000000 0.000000 ( 0.000424)
|
31
|
+
C: 0.010000 0.000000 0.010000 ( 0.000099)
|
32
|
+
|
33
|
+
Using the big file (3k lines):
|
34
|
+
|
35
|
+
user system total real
|
36
|
+
Ruby: 48.040000 0.380000 48.420000 ( 49.440496)
|
37
|
+
C: 0.180000 0.020000 0.200000 ( 0.197321)
|
38
|
+
|
39
|
+
Amazing! So the full English dictionary (236978 lines):
|
40
|
+
|
41
|
+
user system total real
|
42
|
+
C: 1876.590000 119.060000 1995.650000 (2057.623276)
|
43
|
+
|
44
|
+
So 34mins. for the full list of anagrams!
|
45
|
+
|
11
46
|
== Copyright
|
12
47
|
|
13
48
|
Copyright (c) 2010 Iván Valdés (@ivanvc).
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1
|
data/bin/anagram_extractor
CHANGED
@@ -0,0 +1,63 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "version.h"
|
3
|
+
#include "string.h"
|
4
|
+
|
5
|
+
VALUE rb_mAnagramExtractorC;
|
6
|
+
|
7
|
+
VALUE rb_mAnagramExtractorC_anagrams(VALUE rb_module, VALUE rb_first_word, VALUE rb_second_word)
|
8
|
+
{
|
9
|
+
char *first_word = STR2CSTR(rb_first_word);
|
10
|
+
char *second_word = STR2CSTR(rb_second_word);
|
11
|
+
int occurrences[26] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // So it doesn't have trash inside of the positions of the Array.
|
12
|
+
|
13
|
+
if(strlen(first_word) != strlen(second_word))
|
14
|
+
{
|
15
|
+
return Qfalse;
|
16
|
+
}
|
17
|
+
|
18
|
+
int length = strlen(first_word);
|
19
|
+
int i;
|
20
|
+
|
21
|
+
for(i = 0; i < length; ++i)
|
22
|
+
{
|
23
|
+
if(first_word[i] >= 'a' && first_word[i] <= 'z')
|
24
|
+
{
|
25
|
+
occurrences[first_word[i]-'a']++;
|
26
|
+
} else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
|
27
|
+
{
|
28
|
+
occurrences[first_word[i]-'A']++;
|
29
|
+
} else {
|
30
|
+
return Qfalse;
|
31
|
+
}
|
32
|
+
}
|
33
|
+
|
34
|
+
for(i = 0; i < length; ++i)
|
35
|
+
{
|
36
|
+
if(second_word[i] >= 'a' && second_word[i] <= 'z')
|
37
|
+
{
|
38
|
+
if(--occurrences[second_word[i]-'a'] < 0)
|
39
|
+
return Qfalse;
|
40
|
+
} else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
|
41
|
+
{
|
42
|
+
if(--occurrences[second_word[i]-'A'] < 0)
|
43
|
+
return Qfalse;
|
44
|
+
} else {
|
45
|
+
return Qfalse;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
for(i = 0; i < 26; ++i)
|
50
|
+
if(occurrences[i] != 0)
|
51
|
+
return Qfalse;
|
52
|
+
|
53
|
+
|
54
|
+
return Qtrue;
|
55
|
+
}
|
56
|
+
|
57
|
+
void Init_anagram_extractor_c()
|
58
|
+
{
|
59
|
+
VALUE rb_mDictionary = rb_const_get(rb_cObject, rb_intern("Dictionary"));
|
60
|
+
rb_mAnagramExtractorC = rb_define_module_under(rb_mDictionary, "AnagramExtractorC");
|
61
|
+
|
62
|
+
rb_define_method(rb_mAnagramExtractorC, "anagrams?", rb_mAnagramExtractorC_anagrams, 2);
|
63
|
+
}
|
data/ext/extconf.rb
ADDED
data/extras/english.txt
CHANGED
data/lib/dictionary.rb
CHANGED
@@ -5,15 +5,17 @@ module Dictionary
|
|
5
5
|
base_dir = File.expand_path(File.dirname(__FILE__) + '/dictionary') + '/'
|
6
6
|
autoload :AnagramExtractor, base_dir + 'anagram_extractor.rb'
|
7
7
|
autoload :Error, base_dir + 'error.rb'
|
8
|
+
autoload :AnagramExtractorC, base_dir + '../../ext/anagram_extractor_c.o'
|
8
9
|
|
9
10
|
# Extracts the anagrams from a file, and exports the results.
|
10
11
|
#
|
11
|
-
# @param [String, Pathname] file the location of the dictionary
|
12
|
+
# @param [String, Pathname] file the location of the dictionary.
|
12
13
|
# @param [String, Pathname] export_location the export location for the results.
|
14
|
+
# @param [true, false] in_c run the extraction in C.
|
13
15
|
# @return [String] The result of the extraction.
|
14
|
-
def self.extract_anagrams(file, export_location)
|
16
|
+
def self.extract_anagrams(file, export_location, in_c)
|
15
17
|
extractor = AnagramExtractor.new(file)
|
16
|
-
extractor.extract!
|
18
|
+
extractor.extract! in_c
|
17
19
|
extractor.export(export_location)
|
18
20
|
"Exported anagram list to #{export_location}."
|
19
21
|
rescue Error::FileNotFoundError
|
@@ -7,6 +7,8 @@ module Dictionary
|
|
7
7
|
# extractor.extract!
|
8
8
|
# extractor.export('anagrams.txt')
|
9
9
|
class AnagramExtractor
|
10
|
+
include AnagramExtractorC
|
11
|
+
|
10
12
|
# Holds the dictionary file.
|
11
13
|
attr_reader :file
|
12
14
|
# Holds the anagrams extracted.
|
@@ -32,16 +34,18 @@ module Dictionary
|
|
32
34
|
|
33
35
|
# Extracts the anagrams from the provided file.
|
34
36
|
#
|
35
|
-
# @
|
36
|
-
|
37
|
+
# @param [true, false] in_c Execute the code in C.
|
38
|
+
# @return [Array, nil] the anagram list, or nil if no dictionary.
|
39
|
+
def extract!(in_c=false)
|
40
|
+
return unless @file
|
37
41
|
reset_dictionaries
|
38
42
|
File.read(@file).each_line do |word|
|
39
43
|
word = word.strip
|
40
|
-
has_an_anagram = anagram_for? word
|
44
|
+
has_an_anagram = anagram_for? word, in_c
|
41
45
|
@anagrams += [word, has_an_anagram] if has_an_anagram
|
42
46
|
@dictionary << word
|
43
47
|
end
|
44
|
-
@anagrams
|
48
|
+
@anagrams = @anagrams.uniq
|
45
49
|
end
|
46
50
|
|
47
51
|
# Saves the anagram dictionary to a provided file.
|
@@ -67,14 +71,19 @@ module Dictionary
|
|
67
71
|
#
|
68
72
|
# @private
|
69
73
|
# @param [String] word the word to check
|
74
|
+
# @param [true, false] in_c execute the anagram verification in C
|
70
75
|
# @return [nil, String] nil if the dictionary is empty or, there are no anagrams for
|
71
76
|
# this word. Else, the matching word.
|
72
|
-
def anagram_for?(word)
|
73
|
-
word_letters = word.downcase.scan(/\w/).sort
|
77
|
+
def anagram_for?(word, in_c=false)
|
78
|
+
word_letters = word.downcase.scan(/\w/).sort unless in_c
|
74
79
|
@dictionary.find do |test_word|
|
75
|
-
|
76
|
-
|
77
|
-
|
80
|
+
if in_c
|
81
|
+
anagrams? word, test_word
|
82
|
+
else
|
83
|
+
test_word_letters = test_word.downcase.scan(/\w/)
|
84
|
+
test_word_letters.size == word_letters.size &&
|
85
|
+
test_word_letters.sort == word_letters
|
86
|
+
end
|
78
87
|
end
|
79
88
|
end
|
80
89
|
|
@@ -30,25 +30,37 @@ describe Dictionary::AnagramExtractor do
|
|
30
30
|
|
31
31
|
describe ".extract!" do
|
32
32
|
|
33
|
-
|
34
|
-
@extractor.
|
33
|
+
it "should return nil if no file" do
|
34
|
+
@extractor.extract!.should be_nil
|
35
35
|
end
|
36
36
|
|
37
|
-
|
38
|
-
@extractor.extract!.should be_an_instance_of(Array)
|
39
|
-
end
|
37
|
+
describe "with a file" do
|
40
38
|
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
before(:each) do
|
40
|
+
@extractor.file = 'extras/english.txt'
|
41
|
+
end
|
44
42
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
43
|
+
it "should return an Array" do
|
44
|
+
@extractor.extract!.should be_an_instance_of(Array)
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should return four matches" do
|
48
|
+
@extractor.extract!.size.should == 5
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should return four matches even if words are capitalized" do
|
52
|
+
@extractor.file = 'extras/capitalized_english.txt'
|
53
|
+
@extractor.extract!.size.should == 4
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should contain mary and army as anagrams" do
|
57
|
+
@extractor.extract!.should include('mary', 'army')
|
58
|
+
end
|
59
|
+
|
60
|
+
it "should not include army twice" do
|
61
|
+
@extractor.extract!.select { |value| value == 'army' }.size.should == 1
|
62
|
+
end
|
49
63
|
|
50
|
-
it "should contain mary and army as anagrams" do
|
51
|
-
@extractor.extract!.should include('mary', 'army')
|
52
64
|
end
|
53
65
|
|
54
66
|
end
|
@@ -86,7 +98,7 @@ describe Dictionary::AnagramExtractor do
|
|
86
98
|
@extractor.extract!
|
87
99
|
@extractor.export 'example.txt'
|
88
100
|
|
89
|
-
File.read(@location).split("\n").size.should ==
|
101
|
+
File.read(@location).split("\n").size.should == 5
|
90
102
|
end
|
91
103
|
|
92
104
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- "Iv\xC3\xA1n Vald\xC3\xA9s (@ivanvc)"
|
@@ -35,8 +35,8 @@ description: Dictionary
|
|
35
35
|
email: iv@nvald.es
|
36
36
|
executables:
|
37
37
|
- anagram_extractor
|
38
|
-
extensions:
|
39
|
-
|
38
|
+
extensions:
|
39
|
+
- ext/extconf.rb
|
40
40
|
extra_rdoc_files:
|
41
41
|
- README.rdoc
|
42
42
|
files:
|
@@ -46,6 +46,8 @@ files:
|
|
46
46
|
- Rakefile
|
47
47
|
- VERSION
|
48
48
|
- bin/anagram_extractor
|
49
|
+
- ext/anagram_extractor_c.c
|
50
|
+
- ext/extconf.rb
|
49
51
|
- extras/3k_english.txt
|
50
52
|
- extras/capitalized_english.txt
|
51
53
|
- extras/english.txt
|