RubyGems - ivanvc-dictionary - Versions diffs - 0.0.0 → 0.0.1 - Mend

ivanvc-dictionary 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

data/.gitignore +5 -0
data/README.rdoc +36 -1
data/VERSION +1 -1
data/bin/anagram_extractor +1 -1
data/ext/anagram_extractor_c.c +63 -0
data/ext/extconf.rb +4 -0
data/extras/english.txt +1 -0
data/lib/dictionary.rb +5 -3
data/lib/dictionary/anagram_extractor.rb +18 -9
data/spec/anagram_extactor_spec.rb +27 -15
metadata +6 -4

data/.gitignore CHANGED Viewed

@@ -1,2 +1,7 @@
 .DS_Store
 doc/*
+pkg/*
+ivanvc-dictionary.gemspec
+ext/*.o
+ext/*.bundle
+ext/Makefile

data/README.rdoc CHANGED Viewed

@@ -2,12 +2,47 @@
 A sample of an AnagramExtractor
-= Usage
+== Usage
 To extract the anagrams of a dictionary, use:
  bin/anagram_extractor [source file] [destination file]
+=== To use with the C binding
+First, do:
+ cd ext
+ ruby extconf.rb
+ make
+Then, run
+ bin/anagram_extractor [source file] [destination file]
+== Benchmarks
++Done while playing music, browsing the web, etc.+
+Using the small file:
+             user     system      total        real
+ Ruby:   0.000000   0.000000   0.000000 (  0.000424)
+ C:      0.010000   0.000000   0.010000 (  0.000099)
+Using the big file (3k lines):
+             user     system      total        real
+ Ruby:  48.040000   0.380000  48.420000 ( 49.440496)
+ C:      0.180000   0.020000   0.200000 (  0.197321)
+Amazing! So the full English dictionary (236978 lines):
+             user     system      total        real
+ C:    1876.590000 119.060000 1995.650000 (2057.623276)
+So 34mins. for the full list of anagrams!
 == Copyright
 Copyright (c) 2010 Iván Valdés (@ivanvc).

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.0
1	+ 0.0.1

data/bin/anagram_extractor CHANGED Viewed

@@ -6,5 +6,5 @@ require 'dictionary'
 if ARGV.empty?
 	puts "Use anagram_extractor [source dictionary] [export dictionary location]"
 else
-  puts Dictionary.extract_anagrams(ARGV[0], ARGV[1])
+  puts Dictionary.extract_anagrams(ARGV[0], ARGV[1], ARGV[2] == 'c')
 end

data/ext/anagram_extractor_c.c ADDED Viewed

@@ -0,0 +1,63 @@
+#include "ruby.h"
+#include "version.h"
+#include "string.h"
+VALUE rb_mAnagramExtractorC;
+VALUE rb_mAnagramExtractorC_anagrams(VALUE rb_module, VALUE rb_first_word, VALUE rb_second_word)
+{
+	char *first_word  = STR2CSTR(rb_first_word);
+	char *second_word = STR2CSTR(rb_second_word);
+	int occurrences[26] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; // So it doesn't have trash inside of the positions of the Array.
+	if(strlen(first_word) != strlen(second_word))
+	{
+		return Qfalse;
+	}
+	int length = strlen(first_word);
+	int i;
+	for(i = 0; i < length; ++i)
+	{
+		if(first_word[i] >= 'a' && first_word[i] <= 'z')
+		{
+			occurrences[first_word[i]-'a']++;
+		} else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
+		{
+			occurrences[first_word[i]-'A']++;
+		} else {
+			return Qfalse;
+		}
+	}
+	for(i = 0; i < length; ++i)
+	{
+		if(second_word[i] >= 'a' && second_word[i] <= 'z')
+		{
+			if(--occurrences[second_word[i]-'a'] < 0)
+				return Qfalse;
+		} else if(first_word[i] >= 'A' && first_word[i] <= 'Z')
+		{
+			if(--occurrences[second_word[i]-'A'] < 0)
+				return Qfalse;
+		} else {
+			return Qfalse;
+		}
+	}
+	for(i = 0; i < 26; ++i)
+		if(occurrences[i] != 0)
+			return Qfalse;
+	return Qtrue;
+}
+void Init_anagram_extractor_c()
+{
+	VALUE rb_mDictionary = rb_const_get(rb_cObject, rb_intern("Dictionary"));
+	rb_mAnagramExtractorC = rb_define_module_under(rb_mDictionary, "AnagramExtractorC");
+	rb_define_method(rb_mAnagramExtractorC, "anagrams?", rb_mAnagramExtractorC_anagrams, 2);
+}

data/ext/extconf.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'mkmf'
+dir_config "anagram_extractor_c"
+create_makefile "anagram_extractor_c"

data/extras/english.txt CHANGED Viewed

@@ -5,3 +5,4 @@ close
 difference
 mary
 orchestra
+yram

data/lib/dictionary.rb CHANGED Viewed

@@ -5,15 +5,17 @@ module Dictionary
   base_dir = File.expand_path(File.dirname(__FILE__) + '/dictionary') + '/'
   autoload :AnagramExtractor, base_dir + 'anagram_extractor.rb'
   autoload :Error, base_dir + 'error.rb'
+  autoload :AnagramExtractorC, base_dir + '../../ext/anagram_extractor_c.o'
   # Extracts the anagrams from a file, and exports the results.
   #
-  # @param [String, Pathname] file the location of the dictionary
+  # @param [String, Pathname] file the location of the dictionary.
   # @param [String, Pathname] export_location the export location for the results.
+  # @param [true, false] in_c run the extraction in C.
   # @return [String] The result of the extraction.
-  def self.extract_anagrams(file, export_location)
+  def self.extract_anagrams(file, export_location, in_c)
     extractor = AnagramExtractor.new(file)
-    extractor.extract!
+    extractor.extract! in_c
     extractor.export(export_location)
     "Exported anagram list to #{export_location}."
   rescue Error::FileNotFoundError

data/lib/dictionary/anagram_extractor.rb CHANGED Viewed

@@ -7,6 +7,8 @@ module Dictionary
   #   extractor.extract!
   #   extractor.export('anagrams.txt')
   class AnagramExtractor
+    include AnagramExtractorC
     # Holds the dictionary file.
     attr_reader :file
     # Holds the anagrams extracted.
@@ -32,16 +34,18 @@ module Dictionary
     # Extracts the anagrams from the provided file.
     #
-    # @return [Array] the anagram list.
-    def extract!
+    # @param [true, false] in_c Execute the code in C.
+    # @return [Array, nil] the anagram list, or nil if no dictionary.
+    def extract!(in_c=false)
+      return unless @file
       reset_dictionaries
       File.read(@file).each_line do |word|
         word = word.strip
-        has_an_anagram = anagram_for? word
+        has_an_anagram = anagram_for? word, in_c
         @anagrams += [word, has_an_anagram] if has_an_anagram
         @dictionary << word
       end
-      @anagrams
+      @anagrams = @anagrams.uniq
     end
     # Saves the anagram dictionary to a provided file.
@@ -67,14 +71,19 @@ module Dictionary
       #
       # @private
       # @param [String] word the word to check
+      # @param [true, false] in_c execute the anagram verification in C
       # @return [nil, String] nil if the dictionary is empty or, there are no anagrams for
       # this word. Else, the matching word.
-      def anagram_for?(word)
-        word_letters = word.downcase.scan(/\w/).sort
+      def anagram_for?(word, in_c=false)
+        word_letters = word.downcase.scan(/\w/).sort unless in_c
         @dictionary.find do |test_word|
-          test_word_letters = test_word.downcase.scan(/\w/)
-          test_word_letters.size == word_letters.size &&
-            test_word_letters.sort == word_letters
+          if in_c
+            anagrams? word, test_word
+          else
+            test_word_letters = test_word.downcase.scan(/\w/)
+            test_word_letters.size == word_letters.size &&
+              test_word_letters.sort == word_letters
+          end
         end
       end

data/spec/anagram_extactor_spec.rb CHANGED Viewed

@@ -30,25 +30,37 @@ describe Dictionary::AnagramExtractor do
   describe ".extract!" do
-    before(:each) do
-      @extractor.file = 'extras/english.txt'
+    it "should return nil if no file" do
+      @extractor.extract!.should be_nil
     end
-    it "should return an Array" do
-      @extractor.extract!.should be_an_instance_of(Array)
-    end
+    describe "with a file" do
-    it "should return four matches" do
-      @extractor.extract!.size.should == 4
-    end
+      before(:each) do
+        @extractor.file = 'extras/english.txt'
+      end
-    it "should return four matches even if words are capitalized" do
-      @extractor.file = 'extras/capitalized_english.txt'
-      @extractor.extract!.size.should == 4
-    end
+      it "should return an Array" do
+        @extractor.extract!.should be_an_instance_of(Array)
+      end
+      it "should return four matches" do
+        @extractor.extract!.size.should == 5
+      end
+      it "should return four matches even if words are capitalized" do
+        @extractor.file = 'extras/capitalized_english.txt'
+        @extractor.extract!.size.should == 4
+      end
+      it "should contain mary and army as anagrams" do
+        @extractor.extract!.should include('mary', 'army')
+      end
+      it "should not include army twice" do
+        @extractor.extract!.select { |value| value == 'army' }.size.should == 1
+      end
-    it "should contain mary and army as anagrams" do
-      @extractor.extract!.should include('mary', 'army')
     end
   end
@@ -86,7 +98,7 @@ describe Dictionary::AnagramExtractor do
       @extractor.extract!
       @extractor.export 'example.txt'
-      File.read(@location).split("\n").size.should == 4
+      File.read(@location).split("\n").size.should == 5
     end
   end

metadata CHANGED Viewed

@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
   segments:
   - 0
   - 0
-  - 0
-  version: 0.0.0
+  - 1
+  version: 0.0.1
 platform: ruby
 authors:
 - "Iv\xC3\xA1n Vald\xC3\xA9s (@ivanvc)"
@@ -35,8 +35,8 @@ description: Dictionary
 email: iv@nvald.es
 executables:
 - anagram_extractor
-extensions: []
+extensions:
+- ext/extconf.rb
 extra_rdoc_files:
 - README.rdoc
 files:
@@ -46,6 +46,8 @@ files:
 - Rakefile
 - VERSION
 - bin/anagram_extractor
+- ext/anagram_extractor_c.c
+- ext/extconf.rb
 - extras/3k_english.txt
 - extras/capitalized_english.txt
 - extras/english.txt