RubyGems - ivanvc-dictionary - Versions diffs - 0.0.0 → 0.0.1 - Mend

ivanvc-dictionary 0.0.0 → 0.0.1

Files changed (11) hide show

data/.gitignore +5 -0
data/README.rdoc +36 -1
data/VERSION +1 -1
data/bin/anagram_extractor +1 -1
data/ext/anagram_extractor_c.c +63 -0
data/ext/extconf.rb +4 -0
data/extras/english.txt +1 -0
data/lib/dictionary.rb +5 -3
data/lib/dictionary/anagram_extractor.rb +18 -9
data/spec/anagram_extactor_spec.rb +27 -15
metadata +6 -4

data/.gitignore CHANGED Viewed

@@ -1,2 +1,7 @@
 .DS_Store
 doc/*
+pkg/*
+ivanvc-dictionary.gemspec
+ext/*.o
+ext/*.bundle
+ext/Makefile

data/README.rdoc CHANGED Viewed

@@ -2,12 +2,47 @@
 A sample of an AnagramExtractor
-= Usage
+== Usage
 To extract the anagrams of a dictionary, use:
  bin/anagram_extractor [source file] [destination file]
+=== To use with the C binding
+First, do:
+ cd ext
+ ruby extconf.rb
+ make
+Then, run
+ bin/anagram_extractor [source file] [destination file]
+== Benchmarks
++Done while playing music, browsing the web, etc.+
+Using the small file:
+             user     system      total        real
+ Ruby:   0.000000   0.000000   0.000000 (  0.000424)
+ C:      0.010000   0.000000   0.010000 (  0.000099)
+Using the big file (3k lines):
+             user     system      total        real
+ Ruby:  48.040000   0.380000  48.420000 ( 49.440496)
+ C:      0.180000   0.020000   0.200000 (  0.197321)
+Amazing! So the full English dictionary (236978 lines):
+             user     system      total        real
+ C:    1876.590000 119.060000 1995.650000 (2057.623276)
+So 34mins. for the full list of anagrams!
 == Copyright
 Copyright (c) 2010 Iván Valdés (@ivanvc).

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.0
1	+ 0.0.1

data/bin/anagram_extractor CHANGED Viewed

@@ -6,5 +6,5 @@ require 'dictionary'
 if ARGV.empty?
 	puts "Use anagram_extractor [source dictionary] [export dictionary location]"
 else
-  puts Dictionary.extract_anagrams(ARGV[0], ARGV[1])
+  puts Dictionary.extract_anagrams(ARGV[0], ARGV[1], ARGV[2] == 'c')
 end

data/ext/anagram_extractor_c.c ADDED Viewed

@@ -0,0 +1,63 @@
+#include "ruby.h"
+#include "version.h"
+#include "string.h"
+VALUE rb_mAnagramExtractorC;
+VALUE rb_mAnagramExtractorC_anagrams(VALUE rb_module, VALUE rb_first_word, VALUE rb_second_word)
+{
+	char *first_word  = STR2CSTR(rb_first_word);
+	char *second_word = STR2CSTR(rb_second_word);
+	int occurrences[26] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // So it doesn't have trash inside of the positions of the Array.
+	if(strlen(first_word) != strlen(second_word))
+	{
+		return Qfalse;
+	}
+	int length = strlen(first_word);
+	int i;
+	for(i = 0; i < length; ++i)
+	{
+		if(first_word[i] >= 'a' && first_word[i] <= 'z')
+		{
+			occurrences[first_word[i]-'a']++;
+		} else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
+		{
+			occurrences[first_word[i]-'A']++;
+		} else {
+			return Qfalse;
+		}
+	}
+	for(i = 0; i < length; ++i)
+	{
+		if(second_word[i] >= 'a' && second_word[i] <= 'z')
+		{
+			if(--occurrences[second_word[i]-'a'] < 0)
+				return Qfalse;
+		} else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
+		{
+			if(--occurrences[second_word[i]-'A'] < 0)
+				return Qfalse;
+		} else {
+			return Qfalse;
+		}
+	}
+	for(i = 0; i < 26; ++i)
+		if(occurrences[i] != 0)
+			return Qfalse;
+	return Qtrue;
+}
+void Init_anagram_extractor_c()
+{
+	VALUE rb_mDictionary = rb_const_get(rb_cObject, rb_intern("Dictionary"));
+	rb_mAnagramExtractorC = rb_define_module_under(rb_mDictionary, "AnagramExtractorC");
+	rb_define_method(rb_mAnagramExtractorC, "anagrams?", rb_mAnagramExtractorC_anagrams, 2);
+}

data/ext/extconf.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'mkmf'
+dir_config "anagram_extractor_c"
+create_makefile "anagram_extractor_c"

data/extras/english.txt CHANGED Viewed

@@ -5,3 +5,4 @@ close
 difference
 mary
 orchestra
+yram

data/lib/dictionary.rb CHANGED Viewed

@@ -5,15 +5,17 @@ module Dictionary
   base_dir = File.expand_path(File.dirname(__FILE__) + '/dictionary') + '/'
   autoload :AnagramExtractor, base_dir + 'anagram_extractor.rb'
   autoload :Error, base_dir + 'error.rb'
+  autoload :AnagramExtractorC, base_dir + '../../ext/anagram_extractor_c.o'
   # Extracts the anagrams from a file, and exports the results.
   #
-  # @param [String, Pathname] file the location of the dictionary
+  # @param [String, Pathname] file the location of the dictionary.
   # @param [String, Pathname] export_location the export location for the results.
+  # @param [true, false] in_c run the extraction in C.
   # @return [String] The result of the extraction.
-  def self.extract_anagrams(file, export_location)
+  def self.extract_anagrams(file, export_location, in_c)
     extractor = AnagramExtractor.new(file)
-    extractor.extract!
+    extractor.extract! in_c
     extractor.export(export_location)
     "Exported anagram list to #{export_location}."
   rescue Error::FileNotFoundError

data/lib/dictionary/anagram_extractor.rb CHANGED Viewed

@@ -7,6 +7,8 @@ module Dictionary
   #   extractor.extract!
   #   extractor.export('anagrams.txt')
   class AnagramExtractor
+    include AnagramExtractorC
     # Holds the dictionary file.
     attr_reader :file
     # Holds the anagrams extracted.
@@ -32,16 +34,18 @@ module Dictionary
     # Extracts the anagrams from the provided file.
     #
-    # @return [Array] the anagram list.
-    def extract!
+    # @param [true, false] in_c Execute the code in C.
+    # @return [Array, nil] the anagram list, or nil if no dictionary.
+    def extract!(in_c=false)
+      return unless @file
       reset_dictionaries
       File.read(@file).each_line do |word|
         word = word.strip
-        has_an_anagram = anagram_for? word
+        has_an_anagram = anagram_for? word, in_c
         @anagrams += [word, has_an_anagram] if has_an_anagram
         @dictionary << word
       end
-      @anagrams
+      @anagrams = @anagrams.uniq
     end
     # Saves the anagram dictionary to a provided file.
@@ -67,14 +71,19 @@ module Dictionary
       #
       # @private
       # @param [String] word the word to check
+      # @param [true, false] in_c execute the anagram verification in C
       # @return [nil, String] nil if the dictionary is empty or, there are no anagrams for
       # this word. Else, the matching word.
-      def anagram_for?(word)
-        word_letters = word.downcase.scan(/\w/).sort
+      def anagram_for?(word, in_c=false)
+        word_letters = word.downcase.scan(/\w/).sort unless in_c
         @dictionary.find do |test_word|
-          test_word_letters = test_word.downcase.scan(/\w/)
-          test_word_letters.size == word_letters.size &&
-            test_word_letters.sort == word_letters
+          if in_c
+            anagrams? word, test_word
+          else
+            test_word_letters = test_word.downcase.scan(/\w/)
+            test_word_letters.size == word_letters.size &&
+              test_word_letters.sort == word_letters
+          end
         end
       end

data/spec/anagram_extactor_spec.rb CHANGED Viewed

@@ -30,25 +30,37 @@ describe Dictionary::AnagramExtractor do
   describe ".extract!" do
-    before(:each) do
-      @extractor.file = 'extras/english.txt'
+    it "should return nil if no file" do
+      @extractor.extract!.should be_nil
     end
-    it "should return an Array" do
-      @extractor.extract!.should be_an_instance_of(Array)
-    end
+    describe "with a file" do
-    it "should return four matches" do
-      @extractor.extract!.size.should == 4
-    end
+      before(:each) do
+        @extractor.file = 'extras/english.txt'
+      end
-    it "should return four matches even if words are capitalized" do
-      @extractor.file = 'extras/capitalized_english.txt'
-      @extractor.extract!.size.should == 4
-    end
+      it "should return an Array" do
+        @extractor.extract!.should be_an_instance_of(Array)
+      end
+      it "should return four matches" do
+        @extractor.extract!.size.should == 5
+      end
+      it "should return four matches even if words are capitalized" do
+        @extractor.file = 'extras/capitalized_english.txt'
+        @extractor.extract!.size.should == 4
+      end
+      it "should contain mary and army as anagrams" do
+        @extractor.extract!.should include('mary', 'army')
+      end
+      it "should not include army twice" do
+        @extractor.extract!.select { |value| value == 'army' }.size.should == 1
+      end
-    it "should contain mary and army as anagrams" do
-      @extractor.extract!.should include('mary', 'army')
     end
   end
@@ -86,7 +98,7 @@ describe Dictionary::AnagramExtractor do
       @extractor.extract!
       @extractor.export 'example.txt'
-      File.read(@location).split("\n").size.should == 4
+      File.read(@location).split("\n").size.should == 5
     end
   end

metadata CHANGED Viewed

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 0
-  - 0
-  version: 0.0.0
+  - 1
+  version: 0.0.1
 platform: ruby
 authors:
 - "Iv\xC3\xA1n Vald\xC3\xA9s (@ivanvc)"
@@ -35,8 +35,8 @@ description: Dictionary
 email: iv@nvald.es
 executables:
 - anagram_extractor
-extensions: []
+extensions:
+- ext/extconf.rb
 extra_rdoc_files:
 - README.rdoc
 files:
@@ -46,6 +46,8 @@ files:
 - Rakefile
 - VERSION
 - bin/anagram_extractor
+- ext/anagram_extractor_c.c
+- ext/extconf.rb
 - extras/3k_english.txt
 - extras/capitalized_english.txt
 - extras/english.txt