edge-parsley-ruby 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +26 -0
 - data/CHANGELOG +3 -0
 - data/README +32 -0
 - data/Rakefile +57 -0
 - data/VERSION +1 -0
 - data/ext/cparsley.c +140 -0
 - data/ext/extconf.rb +69 -0
 - data/lib/parsley.rb +84 -0
 - data/parsley-ruby.gemspec +58 -0
 - data/test/test_parsley.rb +116 -0
 - data/test/yelp-benchmark.rb +53 -0
 - data/test/yelp-home.html +1004 -0
 - data/test/yelp-home.let +6 -0
 - data/test/yelp.html +2329 -0
 - metadata +79 -0
 
    
        data/.gitignore
    ADDED
    
    | 
         @@ -0,0 +1,26 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            .libs/
         
     | 
| 
      
 2 
     | 
    
         
            +
            *.o
         
     | 
| 
      
 3 
     | 
    
         
            +
            *.lo
         
     | 
| 
      
 4 
     | 
    
         
            +
            dexterc
         
     | 
| 
      
 5 
     | 
    
         
            +
            dexter
         
     | 
| 
      
 6 
     | 
    
         
            +
            parsleyc
         
     | 
| 
      
 7 
     | 
    
         
            +
            parsley
         
     | 
| 
      
 8 
     | 
    
         
            +
            .deps/
         
     | 
| 
      
 9 
     | 
    
         
            +
            Makefile
         
     | 
| 
      
 10 
     | 
    
         
            +
            y.tab.c
         
     | 
| 
      
 11 
     | 
    
         
            +
            autom4te.cache/
         
     | 
| 
      
 12 
     | 
    
         
            +
            autoscan.log
         
     | 
| 
      
 13 
     | 
    
         
            +
            config.log
         
     | 
| 
      
 14 
     | 
    
         
            +
            configure.scan
         
     | 
| 
      
 15 
     | 
    
         
            +
            parser.c
         
     | 
| 
      
 16 
     | 
    
         
            +
            scanner.c
         
     | 
| 
      
 17 
     | 
    
         
            +
            libparsley.la
         
     | 
| 
      
 18 
     | 
    
         
            +
            parser.h
         
     | 
| 
      
 19 
     | 
    
         
            +
            test.log
         
     | 
| 
      
 20 
     | 
    
         
            +
            parsley*.gem
         
     | 
| 
      
 21 
     | 
    
         
            +
            ext/cparsley.bundle
         
     | 
| 
      
 22 
     | 
    
         
            +
            ext/cparsley.so
         
     | 
| 
      
 23 
     | 
    
         
            +
            ext/Makefile
         
     | 
| 
      
 24 
     | 
    
         
            +
            ext/conftest.dSYM/
         
     | 
| 
      
 25 
     | 
    
         
            +
            work
         
     | 
| 
      
 26 
     | 
    
         
            +
            ext/mkmf.log
         
     | 
    
        data/CHANGELOG
    ADDED
    
    
    
        data/README
    ADDED
    
    | 
         @@ -0,0 +1,32 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ABOUT
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            Ruby bindings for Parsley.
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            INSTALLATION
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            = Get Parsley and Dependancies = 
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            Download Parsley from http://github.com/fizx/parsley/tree/master following the installation directions located at http://github.com/fizx/parsley/blob/master/INSTALL
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            = Install parsley-ruby =
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            From source:
         
     | 
| 
      
 14 
     | 
    
         
            +
              sudo rake install
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            From GitHub: DEPRECATED!
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            From GemCutter
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              Run the following if you haven't already:
         
     | 
| 
      
 21 
     | 
    
         
            +
              gem sources -a http://gemcutter.org
         
     | 
| 
      
 22 
     | 
    
         
            +
              Install the gem:
         
     | 
| 
      
 23 
     | 
    
         
            +
              sudo gem install parsley-ruby
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            PARSLETS.COM INTEGRATION
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            We also recommend installing the free online_parselets rubygem in order to use other people's parselets and to share your own:
         
     | 
| 
      
 28 
     | 
    
         
            +
              Run the following if you haven't already:
         
     | 
| 
      
 29 
     | 
    
         
            +
              gem sources -a http://gems.github.com
         
     | 
| 
      
 30 
     | 
    
         
            +
              Install the gem:
         
     | 
| 
      
 31 
     | 
    
         
            +
              sudo gem install iterationlabs-online_parslets
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
    
        data/Rakefile
    ADDED
    
    | 
         @@ -0,0 +1,57 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'rubygems'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'rake'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            begin
         
     | 
| 
      
 5 
     | 
    
         
            +
              require 'jeweler'
         
     | 
| 
      
 6 
     | 
    
         
            +
              Jeweler::Tasks.new do |gem|
         
     | 
| 
      
 7 
     | 
    
         
            +
                gem.name = "parsley-ruby"
         
     | 
| 
      
 8 
     | 
    
         
            +
                gem.summary = "Ruby binding for parsley"
         
     | 
| 
      
 9 
     | 
    
         
            +
                gem.description = "XML/HTML Parser"
         
     | 
| 
      
 10 
     | 
    
         
            +
                gem.email = "kyle@kylemaxwell.com"
         
     | 
| 
      
 11 
     | 
    
         
            +
                gem.homepage = "http://github.com/fizx/parsley-ruby"
         
     | 
| 
      
 12 
     | 
    
         
            +
                gem.authors = ["Kyle Maxwell"]
         
     | 
| 
      
 13 
     | 
    
         
            +
                gem.add_dependency("json", ["> 0.0.0"])
         
     | 
| 
      
 14 
     | 
    
         
            +
                gem.require_paths = ["lib", "ext"]
         
     | 
| 
      
 15 
     | 
    
         
            +
                gem.extensions = "ext/extconf.rb"  end
         
     | 
| 
      
 16 
     | 
    
         
            +
              Jeweler::GemcutterTasks.new
         
     | 
| 
      
 17 
     | 
    
         
            +
            rescue LoadError
         
     | 
| 
      
 18 
     | 
    
         
            +
              puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
         
     | 
| 
      
 19 
     | 
    
         
            +
            end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            require 'rake/testtask'
         
     | 
| 
      
 22 
     | 
    
         
            +
            Rake::TestTask.new(:test) do |test|
         
     | 
| 
      
 23 
     | 
    
         
            +
              test.libs << 'lib' << 'test'
         
     | 
| 
      
 24 
     | 
    
         
            +
              test.pattern = 'test/**/*_test.rb'
         
     | 
| 
      
 25 
     | 
    
         
            +
              test.verbose = true
         
     | 
| 
      
 26 
     | 
    
         
            +
            end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
            begin
         
     | 
| 
      
 29 
     | 
    
         
            +
              require 'rcov/rcovtask'
         
     | 
| 
      
 30 
     | 
    
         
            +
              Rcov::RcovTask.new do |test|
         
     | 
| 
      
 31 
     | 
    
         
            +
                test.libs << 'test'
         
     | 
| 
      
 32 
     | 
    
         
            +
                test.pattern = 'test/**/*_test.rb'
         
     | 
| 
      
 33 
     | 
    
         
            +
                test.verbose = true
         
     | 
| 
      
 34 
     | 
    
         
            +
              end
         
     | 
| 
      
 35 
     | 
    
         
            +
            rescue LoadError
         
     | 
| 
      
 36 
     | 
    
         
            +
              task :rcov do
         
     | 
| 
      
 37 
     | 
    
         
            +
                abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
         
     | 
| 
      
 38 
     | 
    
         
            +
              end
         
     | 
| 
      
 39 
     | 
    
         
            +
            end
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            task :test => :check_dependencies
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            task :default => :test
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            require 'rake/rdoctask'
         
     | 
| 
      
 46 
     | 
    
         
            +
            Rake::RDocTask.new do |rdoc|
         
     | 
| 
      
 47 
     | 
    
         
            +
              if File.exist?('VERSION')
         
     | 
| 
      
 48 
     | 
    
         
            +
                version = File.read('VERSION')
         
     | 
| 
      
 49 
     | 
    
         
            +
              else
         
     | 
| 
      
 50 
     | 
    
         
            +
                version = ""
         
     | 
| 
      
 51 
     | 
    
         
            +
              end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
              rdoc.rdoc_dir = 'rdoc'
         
     | 
| 
      
 54 
     | 
    
         
            +
              rdoc.title = "robots #{version}"
         
     | 
| 
      
 55 
     | 
    
         
            +
              rdoc.rdoc_files.include('README*')
         
     | 
| 
      
 56 
     | 
    
         
            +
              rdoc.rdoc_files.include('lib/**/*.rb')
         
     | 
| 
      
 57 
     | 
    
         
            +
            end
         
     | 
    
        data/VERSION
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            0.4.3
         
     | 
    
        data/ext/cparsley.c
    ADDED
    
    | 
         @@ -0,0 +1,140 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #include "ruby.h"
         
     | 
| 
      
 2 
     | 
    
         
            +
            #include <stdio.h>
         
     | 
| 
      
 3 
     | 
    
         
            +
            #include <libxslt/xslt.h>
         
     | 
| 
      
 4 
     | 
    
         
            +
            #include <libexslt/exslt.h>
         
     | 
| 
      
 5 
     | 
    
         
            +
            #include <libxslt/xsltInternals.h>
         
     | 
| 
      
 6 
     | 
    
         
            +
            #include <libxslt/transform.h>
         
     | 
| 
      
 7 
     | 
    
         
            +
            #include <libxml/parser.h>
         
     | 
| 
      
 8 
     | 
    
         
            +
            #include <libxml/HTMLparser.h>
         
     | 
| 
      
 9 
     | 
    
         
            +
            #include <libxml/HTMLtree.h>
         
     | 
| 
      
 10 
     | 
    
         
            +
            #include <libxml/xmlwriter.h>
         
     | 
| 
      
 11 
     | 
    
         
            +
            #include <parsley.h>
         
     | 
| 
      
 12 
     | 
    
         
            +
            #include <json/json.h>
         
     | 
| 
      
 13 
     | 
    
         
            +
            #include <xml2json.h>
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            VALUE _new(VALUE, VALUE, VALUE);
         
     | 
| 
      
 16 
     | 
    
         
            +
            VALUE _parse(VALUE, VALUE);
         
     | 
| 
      
 17 
     | 
    
         
            +
            VALUE _rb_set_user_agent(VALUE self, VALUE agent);
         
     | 
| 
      
 18 
     | 
    
         
            +
            VALUE c_parsley_err;
         
     | 
| 
      
 19 
     | 
    
         
            +
            VALUE c_parsley;
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            void Init_cparsley()
         
     | 
| 
      
 22 
     | 
    
         
            +
            {
         
     | 
| 
      
 23 
     | 
    
         
            +
            	c_parsley = rb_define_class("CParsley", rb_cObject);
         
     | 
| 
      
 24 
     | 
    
         
            +
            	c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
         
     | 
| 
      
 25 
     | 
    
         
            +
            	rb_define_singleton_method(c_parsley, "new", _new, 2);
         
     | 
| 
      
 26 
     | 
    
         
            +
            	rb_define_singleton_method(c_parsley, "set_user_agent", _rb_set_user_agent, 1);
         
     | 
| 
      
 27 
     | 
    
         
            +
            	rb_define_method(c_parsley, "parse", _parse, 1);
         
     | 
| 
      
 28 
     | 
    
         
            +
            }
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            VALUE 
         
     | 
| 
      
 31 
     | 
    
         
            +
            _new(VALUE self, VALUE parsley, VALUE incl){
         
     | 
| 
      
 32 
     | 
    
         
            +
            	parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
         
     | 
| 
      
 33 
     | 
    
         
            +
            	if(ptr->error != NULL) {
         
     | 
| 
      
 34 
     | 
    
         
            +
            	  rb_raise(c_parsley_err, ptr->error);
         
     | 
| 
      
 35 
     | 
    
         
            +
                parsley_free(ptr);
         
     | 
| 
      
 36 
     | 
    
         
            +
                return Qnil;
         
     | 
| 
      
 37 
     | 
    
         
            +
            	}
         
     | 
| 
      
 38 
     | 
    
         
            +
            	
         
     | 
| 
      
 39 
     | 
    
         
            +
             	return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
         
     | 
| 
      
 40 
     | 
    
         
            +
            }
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
            VALUE 
         
     | 
| 
      
 43 
     | 
    
         
            +
            _rb_set_user_agent(VALUE self, VALUE agent) {
         
     | 
| 
      
 44 
     | 
    
         
            +
              parsley_set_user_agent(STR2CSTR(agent));
         
     | 
| 
      
 45 
     | 
    
         
            +
              return Qtrue;
         
     | 
| 
      
 46 
     | 
    
         
            +
            }
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
            static VALUE 
         
     | 
| 
      
 50 
     | 
    
         
            +
            rubify_recurse(xmlNodePtr xml) {
         
     | 
| 
      
 51 
     | 
    
         
            +
              if(xml == NULL) return NULL;
         
     | 
| 
      
 52 
     | 
    
         
            +
              xmlNodePtr child;
         
     | 
| 
      
 53 
     | 
    
         
            +
              VALUE obj = Qnil;
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
              switch(xml->type) {
         
     | 
| 
      
 56 
     | 
    
         
            +
                case XML_ELEMENT_NODE:
         
     | 
| 
      
 57 
     | 
    
         
            +
                  child = xml->children;
         
     | 
| 
      
 58 
     | 
    
         
            +
                  if(xml->ns == NULL) {
         
     | 
| 
      
 59 
     | 
    
         
            +
                    child = xml;
         
     | 
| 
      
 60 
     | 
    
         
            +
                    obj = rb_hash_new();
         
     | 
| 
      
 61 
     | 
    
         
            +
                    while(child != NULL) {
         
     | 
| 
      
 62 
     | 
    
         
            +
                      rb_hash_aset(obj, rb_str_new2(child->name), rubify_recurse(child->children));
         
     | 
| 
      
 63 
     | 
    
         
            +
                      child = child->next;
         
     | 
| 
      
 64 
     | 
    
         
            +
                    }
         
     | 
| 
      
 65 
     | 
    
         
            +
                  } else if(!strcmp(xml->ns->prefix, "parsley")) {
         
     | 
| 
      
 66 
     | 
    
         
            +
                    if(!strcmp(xml->name, "groups")) {
         
     | 
| 
      
 67 
     | 
    
         
            +
                      obj = rb_ary_new();
         
     | 
| 
      
 68 
     | 
    
         
            +
                      while(child != NULL) {
         
     | 
| 
      
 69 
     | 
    
         
            +
                        rb_ary_push(obj, rubify_recurse(child->children));
         
     | 
| 
      
 70 
     | 
    
         
            +
                        child = child->next;
         
     | 
| 
      
 71 
     | 
    
         
            +
                      }          
         
     | 
| 
      
 72 
     | 
    
         
            +
                    } else if(!strcmp(xml->name, "group")) {
         
     | 
| 
      
 73 
     | 
    
         
            +
                      // Implicitly handled by parsley:groups handler
         
     | 
| 
      
 74 
     | 
    
         
            +
                    }
         
     | 
| 
      
 75 
     | 
    
         
            +
                  }
         
     | 
| 
      
 76 
     | 
    
         
            +
                  break;
         
     | 
| 
      
 77 
     | 
    
         
            +
                case XML_TEXT_NODE:
         
     | 
| 
      
 78 
     | 
    
         
            +
                  obj = rb_str_new2(xml->content);
         
     | 
| 
      
 79 
     | 
    
         
            +
                  break;
         
     | 
| 
      
 80 
     | 
    
         
            +
              }
         
     | 
| 
      
 81 
     | 
    
         
            +
              // inspect(obj);
         
     | 
| 
      
 82 
     | 
    
         
            +
              return obj;
         
     | 
| 
      
 83 
     | 
    
         
            +
            }
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
            static VALUE 
         
     | 
| 
      
 86 
     | 
    
         
            +
            _parse_doc(parsedParsleyPtr ptr, VALUE type) {
         
     | 
| 
      
 87 
     | 
    
         
            +
            	if(ptr->error != NULL || ptr->xml == NULL) {
         
     | 
| 
      
 88 
     | 
    
         
            +
                if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
         
     | 
| 
      
 89 
     | 
    
         
            +
            		rb_raise(c_parsley_err, ptr->error);
         
     | 
| 
      
 90 
     | 
    
         
            +
                parsed_parsley_free(ptr);
         
     | 
| 
      
 91 
     | 
    
         
            +
            		return Qnil;
         
     | 
| 
      
 92 
     | 
    
         
            +
            	}
         
     | 
| 
      
 93 
     | 
    
         
            +
            	
         
     | 
| 
      
 94 
     | 
    
         
            +
            	VALUE output;
         
     | 
| 
      
 95 
     | 
    
         
            +
            	if(type == ID2SYM(rb_intern("json"))) {
         
     | 
| 
      
 96 
     | 
    
         
            +
            		struct json_object *json = xml2json(ptr->xml->children->children);
         
     | 
| 
      
 97 
     | 
    
         
            +
            		char* str = json_object_to_json_string(json);
         
     | 
| 
      
 98 
     | 
    
         
            +
            		output = rb_str_new2(str);
         
     | 
| 
      
 99 
     | 
    
         
            +
            		json_object_put(json);
         
     | 
| 
      
 100 
     | 
    
         
            +
            	} else if(type == ID2SYM(rb_intern("xml"))) {
         
     | 
| 
      
 101 
     | 
    
         
            +
            		xmlChar* str;
         
     | 
| 
      
 102 
     | 
    
         
            +
            		int size;
         
     | 
| 
      
 103 
     | 
    
         
            +
            		xmlDocDumpMemory(ptr->xml, &str, &size);
         
     | 
| 
      
 104 
     | 
    
         
            +
            		output = rb_str_new(str, size);
         
     | 
| 
      
 105 
     | 
    
         
            +
            	} else {
         
     | 
| 
      
 106 
     | 
    
         
            +
             		output = rubify_recurse(ptr->xml->children->children);
         
     | 
| 
      
 107 
     | 
    
         
            +
            		if((void*)output == NULL) output = Qnil; 
         
     | 
| 
      
 108 
     | 
    
         
            +
            	}
         
     | 
| 
      
 109 
     | 
    
         
            +
            	
         
     | 
| 
      
 110 
     | 
    
         
            +
              parsed_parsley_free(ptr);
         
     | 
| 
      
 111 
     | 
    
         
            +
              
         
     | 
| 
      
 112 
     | 
    
         
            +
            	return output;
         
     | 
| 
      
 113 
     | 
    
         
            +
            }
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
            #define OPT(A) rb_hash_aref(options, ID2SYM(rb_intern(A)))
         
     | 
| 
      
 116 
     | 
    
         
            +
            #define OPT_BOOL(A) (OPT(A) != Qnil && OPT(A) != Qfalse)
         
     | 
| 
      
 117 
     | 
    
         
            +
            #define OPT_MATCH(A, B) (rb_hash_aref(options, ID2SYM(rb_intern(A))) == ID2SYM(rb_intern(B)))
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
            VALUE _parse(VALUE self, VALUE options){
         
     | 
| 
      
 120 
     | 
    
         
            +
            	parsleyPtr parsley;
         
     | 
| 
      
 121 
     | 
    
         
            +
            	Data_Get_Struct(self, parsleyPtr, parsley);
         
     | 
| 
      
 122 
     | 
    
         
            +
              int flags = 0;
         
     | 
| 
      
 123 
     | 
    
         
            +
              char *base = NULL;
         
     | 
| 
      
 124 
     | 
    
         
            +
              if(OPT_MATCH("input", "html"))    flags |= PARSLEY_OPTIONS_HTML;
         
     | 
| 
      
 125 
     | 
    
         
            +
              if(OPT_BOOL("prune"))             flags |= PARSLEY_OPTIONS_PRUNE;
         
     | 
| 
      
 126 
     | 
    
         
            +
              if(OPT_BOOL("collate"))           flags |= PARSLEY_OPTIONS_COLLATE;
         
     | 
| 
      
 127 
     | 
    
         
            +
              if(OPT_BOOL("allow_net"))         flags |= PARSLEY_OPTIONS_ALLOW_NET;
         
     | 
| 
      
 128 
     | 
    
         
            +
              if(OPT_BOOL("allow_local"))       flags |= PARSLEY_OPTIONS_ALLOW_LOCAL;
         
     | 
| 
      
 129 
     | 
    
         
            +
              if(OPT_BOOL("sgwrap"))            flags |= PARSLEY_OPTIONS_SGWRAP;
         
     | 
| 
      
 130 
     | 
    
         
            +
              if(OPT_BOOL("has_base"))          base = STR2CSTR(OPT("base"));
         
     | 
| 
      
 131 
     | 
    
         
            +
              
         
     | 
| 
      
 132 
     | 
    
         
            +
              // printf("prune: %d\nallow_net: %d\nallow_local: %d\nhas_base: %d\nflags: %d\n", OPT_BOOL("prune"), OPT_BOOL("allow_net"), OPT_BOOL("allow_local"), OPT_BOOL("has_base"), flags);
         
     | 
| 
      
 133 
     | 
    
         
            +
              
         
     | 
| 
      
 134 
     | 
    
         
            +
              if(OPT_BOOL("is_file")) {
         
     | 
| 
      
 135 
     | 
    
         
            +
                return _parse_doc(parsley_parse_file(parsley, STR2CSTR(OPT("file")), flags), OPT("output"));
         
     | 
| 
      
 136 
     | 
    
         
            +
              } else {
         
     | 
| 
      
 137 
     | 
    
         
            +
                char * str = STR2CSTR(OPT("string"));
         
     | 
| 
      
 138 
     | 
    
         
            +
                return _parse_doc(parsley_parse_string(parsley, str, strlen(str), base, flags), OPT("output"));
         
     | 
| 
      
 139 
     | 
    
         
            +
              }
         
     | 
| 
      
 140 
     | 
    
         
            +
            }
         
     | 
    
        data/ext/extconf.rb
    ADDED
    
    | 
         @@ -0,0 +1,69 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
            ENV["ARCHFLAGS"] ||= "-arch #{`uname -p` =~ /powerpc/ ? 'ppc' : 'i386'}"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            require 'mkmf'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
         
     | 
| 
      
 7 
     | 
    
         
            +
            LIBDIR = Config::CONFIG['libdir']
         
     | 
| 
      
 8 
     | 
    
         
            +
            INCLUDEDIR = Config::CONFIG['includedir']
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            $CFLAGS << " #{ENV["CFLAGS"]}"
         
     | 
| 
      
 11 
     | 
    
         
            +
            if Config::CONFIG['target_os'] == 'mingw32'
         
     | 
| 
      
 12 
     | 
    
         
            +
              $CFLAGS << " -DXP_WIN -DXP_WIN32"
         
     | 
| 
      
 13 
     | 
    
         
            +
            else
         
     | 
| 
      
 14 
     | 
    
         
            +
              $CFLAGS << " -g -DXP_UNIX"
         
     | 
| 
      
 15 
     | 
    
         
            +
            end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            $CFLAGS << " -O3 -Wall -Wextra -Wcast-qual -Wwrite-strings -Wconversion -Wmissing-noreturn -Winline"
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            if Config::CONFIG['target_os'] == 'mingw32'
         
     | 
| 
      
 20 
     | 
    
         
            +
              find_library('xml2', 'xmlParseDoc',
         
     | 
| 
      
 21 
     | 
    
         
            +
                           File.join(ROOT, 'cross', 'libxml2-2.7.2.win32', 'bin'))
         
     | 
| 
      
 22 
     | 
    
         
            +
              find_library('xslt', 'xsltParseStylesheetDoc',
         
     | 
| 
      
 23 
     | 
    
         
            +
                           File.join(ROOT, 'cross', 'libxslt-1.1.24.win32', 'bin'))
         
     | 
| 
      
 24 
     | 
    
         
            +
            else
         
     | 
| 
      
 25 
     | 
    
         
            +
              find_library('xml2', 'xmlParseDoc', LIBDIR)
         
     | 
| 
      
 26 
     | 
    
         
            +
              find_library('xslt', 'xsltParseStylesheetDoc', LIBDIR)
         
     | 
| 
      
 27 
     | 
    
         
            +
            end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            if Config::CONFIG['target_os'] == 'mingw32'
         
     | 
| 
      
 31 
     | 
    
         
            +
              header = File.join(ROOT, 'cross', 'libxml2-2.7.2.win32', 'include')
         
     | 
| 
      
 32 
     | 
    
         
            +
              unless find_header('libxml/xmlversion.h', header)
         
     | 
| 
      
 33 
     | 
    
         
            +
                abort "need libxml"
         
     | 
| 
      
 34 
     | 
    
         
            +
              end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
              header = File.join(ROOT, 'cross', 'libxslt-1.1.24.win32', 'include')
         
     | 
| 
      
 37 
     | 
    
         
            +
              unless find_header('libxslt/libxslt.h', header)
         
     | 
| 
      
 38 
     | 
    
         
            +
                abort "need libxslt"
         
     | 
| 
      
 39 
     | 
    
         
            +
              end
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
              header = File.join(ROOT, 'cross', 'iconv-1.9.2.win32', 'include')
         
     | 
| 
      
 42 
     | 
    
         
            +
              unless find_header('iconv.h', header)
         
     | 
| 
      
 43 
     | 
    
         
            +
                abort "need iconv"
         
     | 
| 
      
 44 
     | 
    
         
            +
              end
         
     | 
| 
      
 45 
     | 
    
         
            +
            else
         
     | 
| 
      
 46 
     | 
    
         
            +
              unless find_header('libxml/xmlversion.h',
         
     | 
| 
      
 47 
     | 
    
         
            +
                                 File.join(INCLUDEDIR, "libxml2"), '/usr/include/libxml2'
         
     | 
| 
      
 48 
     | 
    
         
            +
                                )
         
     | 
| 
      
 49 
     | 
    
         
            +
                abort "need libxml"
         
     | 
| 
      
 50 
     | 
    
         
            +
              end
         
     | 
| 
      
 51 
     | 
    
         
            +
              unless find_header('libxslt/xslt.h', INCLUDEDIR, '/usr/include')
         
     | 
| 
      
 52 
     | 
    
         
            +
                abort "need libxslt"
         
     | 
| 
      
 53 
     | 
    
         
            +
              end
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
              version = try_constant('LIBXML_VERSION', 'libxml/xmlversion.h')
         
     | 
| 
      
 56 
     | 
    
         
            +
            end
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
            myincl = %w[/usr/local/include /opt/local/include /usr/include]
         
     | 
| 
      
 59 
     | 
    
         
            +
            mylib = %w[/usr/local/lib /opt/local/lib /usr/lib]
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
            find_header('ruby.h', INCLUDEDIR, *myincl) or abort "need ruby.h"
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
            find_header('json/json.h', INCLUDEDIR, *myincl) or abort "need json/json.h"
         
     | 
| 
      
 64 
     | 
    
         
            +
            find_library('json', 'json_object_new_string', LIBDIR, *mylib) or abort "need libjson"
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
            find_header('parsley.h', INCLUDEDIR, *myincl) or abort "need parsley.h"
         
     | 
| 
      
 67 
     | 
    
         
            +
            find_library('parsley', 'parsley_compile', LIBDIR, *mylib) or abort "need libparsley"
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
            create_makefile('cparsley')
         
     | 
    
        data/lib/parsley.rb
    ADDED
    
    | 
         @@ -0,0 +1,84 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require File.dirname(__FILE__) + "/../ext/cparsley"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "rubygems"
         
     | 
| 
      
 3 
     | 
    
         
            +
            require "json"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "thread"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            class Parsley
         
     | 
| 
      
 7 
     | 
    
         
            +
              
         
     | 
| 
      
 8 
     | 
    
         
            +
              def self.user_agent=(agent)
         
     | 
| 
      
 9 
     | 
    
         
            +
                @user_agent = agent
         
     | 
| 
      
 10 
     | 
    
         
            +
                CParsley.set_user_agent(agent.to_s)
         
     | 
| 
      
 11 
     | 
    
         
            +
              end
         
     | 
| 
      
 12 
     | 
    
         
            +
              
         
     | 
| 
      
 13 
     | 
    
         
            +
              def self.user_agent
         
     | 
| 
      
 14 
     | 
    
         
            +
                @user_agent
         
     | 
| 
      
 15 
     | 
    
         
            +
              end
         
     | 
| 
      
 16 
     | 
    
         
            +
              
         
     | 
| 
      
 17 
     | 
    
         
            +
              def initialize(parsley, incl = "")
         
     | 
| 
      
 18 
     | 
    
         
            +
                if(parsley.is_a?(Hash))
         
     | 
| 
      
 19 
     | 
    
         
            +
                  parsley = recursive_stringify(parsley).to_json 
         
     | 
| 
      
 20 
     | 
    
         
            +
                end
         
     | 
| 
      
 21 
     | 
    
         
            +
                @@mutex ||= Mutex.new
         
     | 
| 
      
 22 
     | 
    
         
            +
                @@mutex.synchronize do
         
     | 
| 
      
 23 
     | 
    
         
            +
                  @parsley = CParsley.new(parsley, incl)
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
              end
         
     | 
| 
      
 26 
     | 
    
         
            +
              
         
     | 
| 
      
 27 
     | 
    
         
            +
              # Valid options:
         
     | 
| 
      
 28 
     | 
    
         
            +
              #
         
     | 
| 
      
 29 
     | 
    
         
            +
              # Requires one of:
         
     | 
| 
      
 30 
     | 
    
         
            +
              # :file -- the input file path or url
         
     | 
| 
      
 31 
     | 
    
         
            +
              # :string -- the input string
         
     | 
| 
      
 32 
     | 
    
         
            +
              #
         
     | 
| 
      
 33 
     | 
    
         
            +
              # And optionally (default is the first listed value):
         
     | 
| 
      
 34 
     | 
    
         
            +
              # :input => [:html, :xml]
         
     | 
| 
      
 35 
     | 
    
         
            +
              # :output => [:ruby, :json, :xml]
         
     | 
| 
      
 36 
     | 
    
         
            +
              # :prune => [true, false]
         
     | 
| 
      
 37 
     | 
    
         
            +
              # :sgwrap => [false, true]
         
     | 
| 
      
 38 
     | 
    
         
            +
              # :collate => [true, false]
         
     | 
| 
      
 39 
     | 
    
         
            +
              # :base => "http://some/base/href"
         
     | 
| 
      
 40 
     | 
    
         
            +
              # :allow_net => [true, false]
         
     | 
| 
      
 41 
     | 
    
         
            +
              # :allow_local => [true, false]
         
     | 
| 
      
 42 
     | 
    
         
            +
              def parse(options = {})
         
     | 
| 
      
 43 
     | 
    
         
            +
                options[:file] || options[:string] || (raise ParsleyError.new("must specify what to parse"))
         
     | 
| 
      
 44 
     | 
    
         
            +
                
         
     | 
| 
      
 45 
     | 
    
         
            +
                options[:sgwrap] = !!options[:sgwrap]
         
     | 
| 
      
 46 
     | 
    
         
            +
                options[:is_file] = !!options[:file]
         
     | 
| 
      
 47 
     | 
    
         
            +
                options[:has_base] = !!options[:base]
         
     | 
| 
      
 48 
     | 
    
         
            +
                
         
     | 
| 
      
 49 
     | 
    
         
            +
                options[:base] = options[:base].to_s
         
     | 
| 
      
 50 
     | 
    
         
            +
                options[:file] = options[:file].to_s
         
     | 
| 
      
 51 
     | 
    
         
            +
                options[:string] = options[:string].to_s
         
     | 
| 
      
 52 
     | 
    
         
            +
                
         
     | 
| 
      
 53 
     | 
    
         
            +
                options[:input]  ||= :html
         
     | 
| 
      
 54 
     | 
    
         
            +
                options[:output] ||= :ruby
         
     | 
| 
      
 55 
     | 
    
         
            +
                
         
     | 
| 
      
 56 
     | 
    
         
            +
                options[:collate] = true unless options.has_key?(:collate)
         
     | 
| 
      
 57 
     | 
    
         
            +
                options[:prune] = true unless options.has_key?(:prune)
         
     | 
| 
      
 58 
     | 
    
         
            +
                options[:allow_net] = true unless options.has_key?(:allow_net)
         
     | 
| 
      
 59 
     | 
    
         
            +
                options[:allow_local] = true unless options.has_key?(:allow_local)
         
     | 
| 
      
 60 
     | 
    
         
            +
                
         
     | 
| 
      
 61 
     | 
    
         
            +
                options[:collate] = !!options[:collate]
         
     | 
| 
      
 62 
     | 
    
         
            +
                options[:prune] = !!options[:prune]
         
     | 
| 
      
 63 
     | 
    
         
            +
                options[:allow_net] = !!options[:allow_net]
         
     | 
| 
      
 64 
     | 
    
         
            +
                options[:allow_local] = !!options[:allow_local]
         
     | 
| 
      
 65 
     | 
    
         
            +
                
         
     | 
| 
      
 66 
     | 
    
         
            +
                @parsley.parse(options)
         
     | 
| 
      
 67 
     | 
    
         
            +
              end
         
     | 
| 
      
 68 
     | 
    
         
            +
              private
         
     | 
| 
      
 69 
     | 
    
         
            +
              
         
     | 
| 
      
 70 
     | 
    
         
            +
              def recursive_stringify(obj)
         
     | 
| 
      
 71 
     | 
    
         
            +
                case obj
         
     | 
| 
      
 72 
     | 
    
         
            +
                when Hash
         
     | 
| 
      
 73 
     | 
    
         
            +
                  obj.inject({}) do |memo, (k, v)|
         
     | 
| 
      
 74 
     | 
    
         
            +
                    memo[k.to_s] = recursive_stringify(v)
         
     | 
| 
      
 75 
     | 
    
         
            +
                    memo
         
     | 
| 
      
 76 
     | 
    
         
            +
                  end
         
     | 
| 
      
 77 
     | 
    
         
            +
                when Array
         
     | 
| 
      
 78 
     | 
    
         
            +
                  obj.map{|e| recursive_stringify(e) }
         
     | 
| 
      
 79 
     | 
    
         
            +
                else
         
     | 
| 
      
 80 
     | 
    
         
            +
                  obj.to_s
         
     | 
| 
      
 81 
     | 
    
         
            +
                end
         
     | 
| 
      
 82 
     | 
    
         
            +
              end
         
     | 
| 
      
 83 
     | 
    
         
            +
              
         
     | 
| 
      
 84 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,58 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Generated by jeweler
         
     | 
| 
      
 2 
     | 
    
         
            +
            # DO NOT EDIT THIS FILE DIRECTLY
         
     | 
| 
      
 3 
     | 
    
         
            +
            # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
         
     | 
| 
      
 4 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            Gem::Specification.new do |s|
         
     | 
| 
      
 7 
     | 
    
         
            +
              s.name = %q{parsley-ruby}
         
     | 
| 
      
 8 
     | 
    
         
            +
              s.version = "0.4.3"
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
              s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
         
     | 
| 
      
 11 
     | 
    
         
            +
              s.authors = ["Kyle Maxwell"]
         
     | 
| 
      
 12 
     | 
    
         
            +
              s.date = %q{2009-12-28}
         
     | 
| 
      
 13 
     | 
    
         
            +
              s.description = %q{XML/HTML Parser}
         
     | 
| 
      
 14 
     | 
    
         
            +
              s.email = %q{kyle@kylemaxwell.com}
         
     | 
| 
      
 15 
     | 
    
         
            +
              s.extensions = ["ext/extconf.rb"]
         
     | 
| 
      
 16 
     | 
    
         
            +
              s.extra_rdoc_files = [
         
     | 
| 
      
 17 
     | 
    
         
            +
                "README"
         
     | 
| 
      
 18 
     | 
    
         
            +
              ]
         
     | 
| 
      
 19 
     | 
    
         
            +
              s.files = [
         
     | 
| 
      
 20 
     | 
    
         
            +
                ".gitignore",
         
     | 
| 
      
 21 
     | 
    
         
            +
                 "CHANGELOG",
         
     | 
| 
      
 22 
     | 
    
         
            +
                 "README",
         
     | 
| 
      
 23 
     | 
    
         
            +
                 "Rakefile",
         
     | 
| 
      
 24 
     | 
    
         
            +
                 "VERSION",
         
     | 
| 
      
 25 
     | 
    
         
            +
                 "ext/cparsley.c",
         
     | 
| 
      
 26 
     | 
    
         
            +
                 "ext/extconf.rb",
         
     | 
| 
      
 27 
     | 
    
         
            +
                 "lib/parsley.rb",
         
     | 
| 
      
 28 
     | 
    
         
            +
                 "parsley-ruby.gemspec",
         
     | 
| 
      
 29 
     | 
    
         
            +
                 "test/test_parsley.rb",
         
     | 
| 
      
 30 
     | 
    
         
            +
                 "test/yelp-benchmark.rb",
         
     | 
| 
      
 31 
     | 
    
         
            +
                 "test/yelp-home.html",
         
     | 
| 
      
 32 
     | 
    
         
            +
                 "test/yelp-home.let",
         
     | 
| 
      
 33 
     | 
    
         
            +
                 "test/yelp.html"
         
     | 
| 
      
 34 
     | 
    
         
            +
              ]
         
     | 
| 
      
 35 
     | 
    
         
            +
              s.homepage = %q{http://github.com/fizx/parsley-ruby}
         
     | 
| 
      
 36 
     | 
    
         
            +
              s.rdoc_options = ["--charset=UTF-8"]
         
     | 
| 
      
 37 
     | 
    
         
            +
              s.require_paths = ["lib", "ext"]
         
     | 
| 
      
 38 
     | 
    
         
            +
              s.rubygems_version = %q{1.3.5}
         
     | 
| 
      
 39 
     | 
    
         
            +
              s.summary = %q{Ruby binding for parsley}
         
     | 
| 
      
 40 
     | 
    
         
            +
              s.test_files = [
         
     | 
| 
      
 41 
     | 
    
         
            +
                "test/test_parsley.rb",
         
     | 
| 
      
 42 
     | 
    
         
            +
                 "test/yelp-benchmark.rb"
         
     | 
| 
      
 43 
     | 
    
         
            +
              ]
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
              if s.respond_to? :specification_version then
         
     | 
| 
      
 46 
     | 
    
         
            +
                current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
         
     | 
| 
      
 47 
     | 
    
         
            +
                s.specification_version = 3
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
         
     | 
| 
      
 50 
     | 
    
         
            +
                  s.add_runtime_dependency(%q<json>, ["> 0.0.0"])
         
     | 
| 
      
 51 
     | 
    
         
            +
                else
         
     | 
| 
      
 52 
     | 
    
         
            +
                  s.add_dependency(%q<json>, ["> 0.0.0"])
         
     | 
| 
      
 53 
     | 
    
         
            +
                end
         
     | 
| 
      
 54 
     | 
    
         
            +
              else
         
     | 
| 
      
 55 
     | 
    
         
            +
                s.add_dependency(%q<json>, ["> 0.0.0"])
         
     | 
| 
      
 56 
     | 
    
         
            +
              end
         
     | 
| 
      
 57 
     | 
    
         
            +
            end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     |