ivanvc-dictionary 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,2 +1,7 @@
1
1
  .DS_Store
2
2
  doc/*
3
+ pkg/*
4
+ ivanvc-dictionary.gemspec
5
+ ext/*.o
6
+ ext/*.bundle
7
+ ext/Makefile
data/README.rdoc CHANGED
@@ -2,12 +2,47 @@
2
2
 
3
3
  A sample of an AnagramExtractor
4
4
 
5
- = Usage
5
+ == Usage
6
6
 
7
7
  To extract the anagrams of a dictionary, use:
8
8
 
9
9
  bin/anagram_extractor [source file] [destination file]
10
10
 
11
+ === To use with the C binding
12
+
13
+ First, do:
14
+
15
+ cd ext
16
+ ruby extconf.rb
17
+ make
18
+
19
+ Then, run
20
+
21
+ bin/anagram_extractor [source file] [destination file]
22
+
23
+ == Benchmarks
24
+
25
+ +Done while playing music, browsing the web, etc.+
26
+
27
+ Using the small file:
28
+
29
+ user system total real
30
+ Ruby: 0.000000 0.000000 0.000000 ( 0.000424)
31
+ C: 0.010000 0.000000 0.010000 ( 0.000099)
32
+
33
+ Using the big file (3k lines):
34
+
35
+ user system total real
36
+ Ruby: 48.040000 0.380000 48.420000 ( 49.440496)
37
+ C: 0.180000 0.020000 0.200000 ( 0.197321)
38
+
39
+ Amazing! So the full English dictionary (236978 lines):
40
+
41
+ user system total real
42
+ C: 1876.590000 119.060000 1995.650000 (2057.623276)
43
+
44
+ So 34mins. for the full list of anagrams!
45
+
11
46
  == Copyright
12
47
 
13
48
  Copyright (c) 2010 Iván Valdés (@ivanvc).
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.0
1
+ 0.0.1
@@ -6,5 +6,5 @@ require 'dictionary'
6
6
  if ARGV.empty?
7
7
  puts "Use anagram_extractor [source dictionary] [export dictionary location]"
8
8
  else
9
- puts Dictionary.extract_anagrams(ARGV[0], ARGV[1])
9
+ puts Dictionary.extract_anagrams(ARGV[0], ARGV[1], ARGV[2] == 'c')
10
10
  end
@@ -0,0 +1,63 @@
1
+ #include "ruby.h"
2
+ #include "version.h"
3
+ #include "string.h"
4
+
5
+ VALUE rb_mAnagramExtractorC;
6
+
7
+ VALUE rb_mAnagramExtractorC_anagrams(VALUE rb_module, VALUE rb_first_word, VALUE rb_second_word)
8
+ {
9
+ char *first_word = STR2CSTR(rb_first_word);
10
+ char *second_word = STR2CSTR(rb_second_word);
11
+ int occurrences[26] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // So it doesn't have trash inside of the positions of the Array.
12
+
13
+ if(strlen(first_word) != strlen(second_word))
14
+ {
15
+ return Qfalse;
16
+ }
17
+
18
+ int length = strlen(first_word);
19
+ int i;
20
+
21
+ for(i = 0; i < length; ++i)
22
+ {
23
+ if(first_word[i] >= 'a' && first_word[i] <= 'z')
24
+ {
25
+ occurrences[first_word[i]-'a']++;
26
+ } else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
27
+ {
28
+ occurrences[first_word[i]-'A']++;
29
+ } else {
30
+ return Qfalse;
31
+ }
32
+ }
33
+
34
+ for(i = 0; i < length; ++i)
35
+ {
36
+ if(second_word[i] >= 'a' && second_word[i] <= 'z')
37
+ {
38
+ if(--occurrences[second_word[i]-'a'] < 0)
39
+ return Qfalse;
40
+ } else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
41
+ {
42
+ if(--occurrences[second_word[i]-'A'] < 0)
43
+ return Qfalse;
44
+ } else {
45
+ return Qfalse;
46
+ }
47
+ }
48
+
49
+ for(i = 0; i < 26; ++i)
50
+ if(occurrences[i] != 0)
51
+ return Qfalse;
52
+
53
+
54
+ return Qtrue;
55
+ }
56
+
57
+ void Init_anagram_extractor_c()
58
+ {
59
+ VALUE rb_mDictionary = rb_const_get(rb_cObject, rb_intern("Dictionary"));
60
+ rb_mAnagramExtractorC = rb_define_module_under(rb_mDictionary, "AnagramExtractorC");
61
+
62
+ rb_define_method(rb_mAnagramExtractorC, "anagrams?", rb_mAnagramExtractorC_anagrams, 2);
63
+ }
data/ext/extconf.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'mkmf'
2
+
3
+ dir_config "anagram_extractor_c"
4
+ create_makefile "anagram_extractor_c"
data/extras/english.txt CHANGED
@@ -5,3 +5,4 @@ close
5
5
  difference
6
6
  mary
7
7
  orchestra
8
+ yram
data/lib/dictionary.rb CHANGED
@@ -5,15 +5,17 @@ module Dictionary
5
5
  base_dir = File.expand_path(File.dirname(__FILE__) + '/dictionary') + '/'
6
6
  autoload :AnagramExtractor, base_dir + 'anagram_extractor.rb'
7
7
  autoload :Error, base_dir + 'error.rb'
8
+ autoload :AnagramExtractorC, base_dir + '../../ext/anagram_extractor_c.o'
8
9
 
9
10
  # Extracts the anagrams from a file, and exports the results.
10
11
  #
11
- # @param [String, Pathname] file the location of the dictionary
12
+ # @param [String, Pathname] file the location of the dictionary.
12
13
  # @param [String, Pathname] export_location the export location for the results.
14
+ # @param [true, false] in_c run the extraction in C.
13
15
  # @return [String] The result of the extraction.
14
- def self.extract_anagrams(file, export_location)
16
+ def self.extract_anagrams(file, export_location, in_c)
15
17
  extractor = AnagramExtractor.new(file)
16
- extractor.extract!
18
+ extractor.extract! in_c
17
19
  extractor.export(export_location)
18
20
  "Exported anagram list to #{export_location}."
19
21
  rescue Error::FileNotFoundError
@@ -7,6 +7,8 @@ module Dictionary
7
7
  # extractor.extract!
8
8
  # extractor.export('anagrams.txt')
9
9
  class AnagramExtractor
10
+ include AnagramExtractorC
11
+
10
12
  # Holds the dictionary file.
11
13
  attr_reader :file
12
14
  # Holds the anagrams extracted.
@@ -32,16 +34,18 @@ module Dictionary
32
34
 
33
35
  # Extracts the anagrams from the provided file.
34
36
  #
35
- # @return [Array] the anagram list.
36
- def extract!
37
+ # @param [true, false] in_c Execute the code in C.
38
+ # @return [Array, nil] the anagram list, or nil if no dictionary.
39
+ def extract!(in_c=false)
40
+ return unless @file
37
41
  reset_dictionaries
38
42
  File.read(@file).each_line do |word|
39
43
  word = word.strip
40
- has_an_anagram = anagram_for? word
44
+ has_an_anagram = anagram_for? word, in_c
41
45
  @anagrams += [word, has_an_anagram] if has_an_anagram
42
46
  @dictionary << word
43
47
  end
44
- @anagrams
48
+ @anagrams = @anagrams.uniq
45
49
  end
46
50
 
47
51
  # Saves the anagram dictionary to a provided file.
@@ -67,14 +71,19 @@ module Dictionary
67
71
  #
68
72
  # @private
69
73
  # @param [String] word the word to check
74
+ # @param [true, false] in_c execute the anagram verification in C
70
75
  # @return [nil, String] nil if the dictionary is empty or, there are no anagrams for
71
76
  # this word. Else, the matching word.
72
- def anagram_for?(word)
73
- word_letters = word.downcase.scan(/\w/).sort
77
+ def anagram_for?(word, in_c=false)
78
+ word_letters = word.downcase.scan(/\w/).sort unless in_c
74
79
  @dictionary.find do |test_word|
75
- test_word_letters = test_word.downcase.scan(/\w/)
76
- test_word_letters.size == word_letters.size &&
77
- test_word_letters.sort == word_letters
80
+ if in_c
81
+ anagrams? word, test_word
82
+ else
83
+ test_word_letters = test_word.downcase.scan(/\w/)
84
+ test_word_letters.size == word_letters.size &&
85
+ test_word_letters.sort == word_letters
86
+ end
78
87
  end
79
88
  end
80
89
 
@@ -30,25 +30,37 @@ describe Dictionary::AnagramExtractor do
30
30
 
31
31
  describe ".extract!" do
32
32
 
33
- before(:each) do
34
- @extractor.file = 'extras/english.txt'
33
+ it "should return nil if no file" do
34
+ @extractor.extract!.should be_nil
35
35
  end
36
36
 
37
- it "should return an Array" do
38
- @extractor.extract!.should be_an_instance_of(Array)
39
- end
37
+ describe "with a file" do
40
38
 
41
- it "should return four matches" do
42
- @extractor.extract!.size.should == 4
43
- end
39
+ before(:each) do
40
+ @extractor.file = 'extras/english.txt'
41
+ end
44
42
 
45
- it "should return four matches even if words are capitalized" do
46
- @extractor.file = 'extras/capitalized_english.txt'
47
- @extractor.extract!.size.should == 4
48
- end
43
+ it "should return an Array" do
44
+ @extractor.extract!.should be_an_instance_of(Array)
45
+ end
46
+
47
+ it "should return four matches" do
48
+ @extractor.extract!.size.should == 5
49
+ end
50
+
51
+ it "should return four matches even if words are capitalized" do
52
+ @extractor.file = 'extras/capitalized_english.txt'
53
+ @extractor.extract!.size.should == 4
54
+ end
55
+
56
+ it "should contain mary and army as anagrams" do
57
+ @extractor.extract!.should include('mary', 'army')
58
+ end
59
+
60
+ it "should not include army twice" do
61
+ @extractor.extract!.select { |value| value == 'army' }.size.should == 1
62
+ end
49
63
 
50
- it "should contain mary and army as anagrams" do
51
- @extractor.extract!.should include('mary', 'army')
52
64
  end
53
65
 
54
66
  end
@@ -86,7 +98,7 @@ describe Dictionary::AnagramExtractor do
86
98
  @extractor.extract!
87
99
  @extractor.export 'example.txt'
88
100
 
89
- File.read(@location).split("\n").size.should == 4
101
+ File.read(@location).split("\n").size.should == 5
90
102
  end
91
103
 
92
104
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 0
9
- version: 0.0.0
8
+ - 1
9
+ version: 0.0.1
10
10
  platform: ruby
11
11
  authors:
12
12
  - "Iv\xC3\xA1n Vald\xC3\xA9s (@ivanvc)"
@@ -35,8 +35,8 @@ description: Dictionary
35
35
  email: iv@nvald.es
36
36
  executables:
37
37
  - anagram_extractor
38
- extensions: []
39
-
38
+ extensions:
39
+ - ext/extconf.rb
40
40
  extra_rdoc_files:
41
41
  - README.rdoc
42
42
  files:
@@ -46,6 +46,8 @@ files:
46
46
  - Rakefile
47
47
  - VERSION
48
48
  - bin/anagram_extractor
49
+ - ext/anagram_extractor_c.c
50
+ - ext/extconf.rb
49
51
  - extras/3k_english.txt
50
52
  - extras/capitalized_english.txt
51
53
  - extras/english.txt