ivanvc-dictionary 0.0.0 → 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,2 +1,7 @@
1
1
  .DS_Store
2
2
  doc/*
3
+ pkg/*
4
+ ivanvc-dictionary.gemspec
5
+ ext/*.o
6
+ ext/*.bundle
7
+ ext/Makefile
data/README.rdoc CHANGED
@@ -2,12 +2,47 @@
2
2
 
3
3
  A sample of an AnagramExtractor
4
4
 
5
- = Usage
5
+ == Usage
6
6
 
7
7
  To extract the anagrams of a dictionary, use:
8
8
 
9
9
  bin/anagram_extractor [source file] [destination file]
10
10
 
11
+ === To use with the C binding
12
+
13
+ First, do:
14
+
15
+ cd ext
16
+ ruby extconf.rb
17
+ make
18
+
19
+ Then, run
20
+
21
+ bin/anagram_extractor [source file] [destination file]
22
+
23
+ == Benchmarks
24
+
25
+ +Done while playing music, browsing the web, etc.+
26
+
27
+ Using the small file:
28
+
29
+ user system total real
30
+ Ruby: 0.000000 0.000000 0.000000 ( 0.000424)
31
+ C: 0.010000 0.000000 0.010000 ( 0.000099)
32
+
33
+ Using the big file (3k lines):
34
+
35
+ user system total real
36
+ Ruby: 48.040000 0.380000 48.420000 ( 49.440496)
37
+ C: 0.180000 0.020000 0.200000 ( 0.197321)
38
+
39
+ Amazing! So the full English dictionary (236978 lines):
40
+
41
+ user system total real
42
+ C: 1876.590000 119.060000 1995.650000 (2057.623276)
43
+
44
+ So 34mins. for the full list of anagrams!
45
+
11
46
  == Copyright
12
47
 
13
48
  Copyright (c) 2010 Iván Valdés (@ivanvc).
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.0
1
+ 0.0.1
@@ -6,5 +6,5 @@ require 'dictionary'
6
6
  if ARGV.empty?
7
7
  puts "Use anagram_extractor [source dictionary] [export dictionary location]"
8
8
  else
9
- puts Dictionary.extract_anagrams(ARGV[0], ARGV[1])
9
+ puts Dictionary.extract_anagrams(ARGV[0], ARGV[1], ARGV[2] == 'c')
10
10
  end
@@ -0,0 +1,63 @@
1
+ #include "ruby.h"
2
+ #include "version.h"
3
+ #include "string.h"
4
+
5
+ VALUE rb_mAnagramExtractorC;
6
+
7
+ VALUE rb_mAnagramExtractorC_anagrams(VALUE rb_module, VALUE rb_first_word, VALUE rb_second_word)
8
+ {
9
+ char *first_word = STR2CSTR(rb_first_word);
10
+ char *second_word = STR2CSTR(rb_second_word);
11
+ int occurrences[26] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // So it doesn't have trash inside of the positions of the Array.
12
+
13
+ if(strlen(first_word) != strlen(second_word))
14
+ {
15
+ return Qfalse;
16
+ }
17
+
18
+ int length = strlen(first_word);
19
+ int i;
20
+
21
+ for(i = 0; i < length; ++i)
22
+ {
23
+ if(first_word[i] >= 'a' && first_word[i] <= 'z')
24
+ {
25
+ occurrences[first_word[i]-'a']++;
26
+ } else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
27
+ {
28
+ occurrences[first_word[i]-'A']++;
29
+ } else {
30
+ return Qfalse;
31
+ }
32
+ }
33
+
34
+ for(i = 0; i < length; ++i)
35
+ {
36
+ if(second_word[i] >= 'a' && second_word[i] <= 'z')
37
+ {
38
+ if(--occurrences[second_word[i]-'a'] < 0)
39
+ return Qfalse;
40
+ } else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
41
+ {
42
+ if(--occurrences[second_word[i]-'A'] < 0)
43
+ return Qfalse;
44
+ } else {
45
+ return Qfalse;
46
+ }
47
+ }
48
+
49
+ for(i = 0; i < 26; ++i)
50
+ if(occurrences[i] != 0)
51
+ return Qfalse;
52
+
53
+
54
+ return Qtrue;
55
+ }
56
+
57
+ void Init_anagram_extractor_c()
58
+ {
59
+ VALUE rb_mDictionary = rb_const_get(rb_cObject, rb_intern("Dictionary"));
60
+ rb_mAnagramExtractorC = rb_define_module_under(rb_mDictionary, "AnagramExtractorC");
61
+
62
+ rb_define_method(rb_mAnagramExtractorC, "anagrams?", rb_mAnagramExtractorC_anagrams, 2);
63
+ }
data/ext/extconf.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+
3
+ dir_config "anagram_extractor_c"
4
+ create_makefile "anagram_extractor_c"
data/extras/english.txt CHANGED
@@ -5,3 +5,4 @@ close
5
5
  difference
6
6
  mary
7
7
  orchestra
8
+ yram
data/lib/dictionary.rb CHANGED
@@ -5,15 +5,17 @@ module Dictionary
5
5
  base_dir = File.expand_path(File.dirname(__FILE__) + '/dictionary') + '/'
6
6
  autoload :AnagramExtractor, base_dir + 'anagram_extractor.rb'
7
7
  autoload :Error, base_dir + 'error.rb'
8
+ autoload :AnagramExtractorC, base_dir + '../../ext/anagram_extractor_c.o'
8
9
 
9
10
  # Extracts the anagrams from a file, and exports the results.
10
11
  #
11
- # @param [String, Pathname] file the location of the dictionary
12
+ # @param [String, Pathname] file the location of the dictionary.
12
13
  # @param [String, Pathname] export_location the export location for the results.
14
+ # @param [true, false] in_c run the extraction in C.
13
15
  # @return [String] The result of the extraction.
14
- def self.extract_anagrams(file, export_location)
16
+ def self.extract_anagrams(file, export_location, in_c)
15
17
  extractor = AnagramExtractor.new(file)
16
- extractor.extract!
18
+ extractor.extract! in_c
17
19
  extractor.export(export_location)
18
20
  "Exported anagram list to #{export_location}."
19
21
  rescue Error::FileNotFoundError
@@ -7,6 +7,8 @@ module Dictionary
7
7
  # extractor.extract!
8
8
  # extractor.export('anagrams.txt')
9
9
  class AnagramExtractor
10
+ include AnagramExtractorC
11
+
10
12
  # Holds the dictionary file.
11
13
  attr_reader :file
12
14
  # Holds the anagrams extracted.
@@ -32,16 +34,18 @@ module Dictionary
32
34
 
33
35
  # Extracts the anagrams from the provided file.
34
36
  #
35
- # @return [Array] the anagram list.
36
- def extract!
37
+ # @param [true, false] in_c Execute the code in C.
38
+ # @return [Array, nil] the anagram list, or nil if no dictionary.
39
+ def extract!(in_c=false)
40
+ return unless @file
37
41
  reset_dictionaries
38
42
  File.read(@file).each_line do |word|
39
43
  word = word.strip
40
- has_an_anagram = anagram_for? word
44
+ has_an_anagram = anagram_for? word, in_c
41
45
  @anagrams += [word, has_an_anagram] if has_an_anagram
42
46
  @dictionary << word
43
47
  end
44
- @anagrams
48
+ @anagrams = @anagrams.uniq
45
49
  end
46
50
 
47
51
  # Saves the anagram dictionary to a provided file.
@@ -67,14 +71,19 @@ module Dictionary
67
71
  #
68
72
  # @private
69
73
  # @param [String] word the word to check
74
+ # @param [true, false] in_c execute the anagram verification in C
70
75
  # @return [nil, String] nil if the dictionary is empty or, there are no anagrams for
71
76
  # this word. Else, the matching word.
72
- def anagram_for?(word)
73
- word_letters = word.downcase.scan(/\w/).sort
77
+ def anagram_for?(word, in_c=false)
78
+ word_letters = word.downcase.scan(/\w/).sort unless in_c
74
79
  @dictionary.find do |test_word|
75
- test_word_letters = test_word.downcase.scan(/\w/)
76
- test_word_letters.size == word_letters.size &&
77
- test_word_letters.sort == word_letters
80
+ if in_c
81
+ anagrams? word, test_word
82
+ else
83
+ test_word_letters = test_word.downcase.scan(/\w/)
84
+ test_word_letters.size == word_letters.size &&
85
+ test_word_letters.sort == word_letters
86
+ end
78
87
  end
79
88
  end
80
89
 
@@ -30,25 +30,37 @@ describe Dictionary::AnagramExtractor do
30
30
 
31
31
  describe ".extract!" do
32
32
 
33
- before(:each) do
34
- @extractor.file = 'extras/english.txt'
33
+ it "should return nil if no file" do
34
+ @extractor.extract!.should be_nil
35
35
  end
36
36
 
37
- it "should return an Array" do
38
- @extractor.extract!.should be_an_instance_of(Array)
39
- end
37
+ describe "with a file" do
40
38
 
41
- it "should return four matches" do
42
- @extractor.extract!.size.should == 4
43
- end
39
+ before(:each) do
40
+ @extractor.file = 'extras/english.txt'
41
+ end
44
42
 
45
- it "should return four matches even if words are capitalized" do
46
- @extractor.file = 'extras/capitalized_english.txt'
47
- @extractor.extract!.size.should == 4
48
- end
43
+ it "should return an Array" do
44
+ @extractor.extract!.should be_an_instance_of(Array)
45
+ end
46
+
47
+ it "should return four matches" do
48
+ @extractor.extract!.size.should == 5
49
+ end
50
+
51
+ it "should return four matches even if words are capitalized" do
52
+ @extractor.file = 'extras/capitalized_english.txt'
53
+ @extractor.extract!.size.should == 4
54
+ end
55
+
56
+ it "should contain mary and army as anagrams" do
57
+ @extractor.extract!.should include('mary', 'army')
58
+ end
59
+
60
+ it "should not include army twice" do
61
+ @extractor.extract!.select { |value| value == 'army' }.size.should == 1
62
+ end
49
63
 
50
- it "should contain mary and army as anagrams" do
51
- @extractor.extract!.should include('mary', 'army')
52
64
  end
53
65
 
54
66
  end
@@ -86,7 +98,7 @@ describe Dictionary::AnagramExtractor do
86
98
  @extractor.extract!
87
99
  @extractor.export 'example.txt'
88
100
 
89
- File.read(@location).split("\n").size.should == 4
101
+ File.read(@location).split("\n").size.should == 5
90
102
  end
91
103
 
92
104
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 0
9
- version: 0.0.0
8
+ - 1
9
+ version: 0.0.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - "Iv\xC3\xA1n Vald\xC3\xA9s (@ivanvc)"
@@ -35,8 +35,8 @@ description: Dictionary
35
35
  email: iv@nvald.es
36
36
  executables:
37
37
  - anagram_extractor
38
- extensions: []
39
-
38
+ extensions:
39
+ - ext/extconf.rb
40
40
  extra_rdoc_files:
41
41
  - README.rdoc
42
42
  files:
@@ -46,6 +46,8 @@ files:
46
46
  - Rakefile
47
47
  - VERSION
48
48
  - bin/anagram_extractor
49
+ - ext/anagram_extractor_c.c
50
+ - ext/extconf.rb
49
51
  - extras/3k_english.txt
50
52
  - extras/capitalized_english.txt
51
53
  - extras/english.txt