scrappy 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest +2 -0
- data/lib/scrappy.rb +1 -1
- data/public/javascripts/utils.js +10 -0
- data/scrappy.gemspec +3 -3
- data/views/test.haml +73 -0
- metadata +5 -3
    
        data/History.txt
    CHANGED
    
    
    
        data/Manifest
    CHANGED
    
    | @@ -37,6 +37,7 @@ public/images/logo.png | |
| 37 37 | 
             
            public/images/logo_tiny.png
         | 
| 38 38 | 
             
            public/javascripts/annotator.js
         | 
| 39 39 | 
             
            public/javascripts/remote.js
         | 
| 40 | 
            +
            public/javascripts/utils.js
         | 
| 40 41 | 
             
            public/stylesheets/application.css
         | 
| 41 42 | 
             
            test/test_helper.rb
         | 
| 42 43 | 
             
            test/test_scrappy.rb
         | 
| @@ -46,3 +47,4 @@ views/home.haml | |
| 46 47 | 
             
            views/layout.haml
         | 
| 47 48 | 
             
            views/patterns.haml
         | 
| 48 49 | 
             
            views/samples.haml
         | 
| 50 | 
            +
            views/test.haml
         | 
    
        data/lib/scrappy.rb
    CHANGED
    
    
| @@ -0,0 +1,10 @@ | |
| 1 | 
            +
            jQuery(function ($) {
         | 
| 2 | 
            +
              $('.checkall').click(function () {
         | 
| 3 | 
            +
              	$(this).parents('form').find(':checkbox').attr('checked', this.checked);
         | 
| 4 | 
            +
              });
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              $('.checksend').live('click', function (e){
         | 
| 7 | 
            +
                $("form").attr("action",$(this).attr("href")).submit();
         | 
| 8 | 
            +
                return false;
         | 
| 9 | 
            +
              });
         | 
| 10 | 
            +
            });
         | 
    
        data/scrappy.gemspec
    CHANGED
    
    | @@ -2,17 +2,17 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            Gem::Specification.new do |s|
         | 
| 4 4 | 
             
              s.name = %q{scrappy}
         | 
| 5 | 
            -
              s.version = "0.4. | 
| 5 | 
            +
              s.version = "0.4.3"
         | 
| 6 6 |  | 
| 7 7 | 
             
              s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
         | 
| 8 8 | 
             
              s.authors = ["Jose Ignacio"]
         | 
| 9 | 
            -
              s.date = %q{2011-07- | 
| 9 | 
            +
              s.date = %q{2011-07-11}
         | 
| 10 10 | 
             
              s.default_executable = %q{scrappy}
         | 
| 11 11 | 
             
              s.description = %q{RDF web scraper}
         | 
| 12 12 | 
             
              s.email = %q{joseignacio.fernandez@gmail.com}
         | 
| 13 13 | 
             
              s.executables = ["scrappy"]
         | 
| 14 14 | 
             
              s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "extractors/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/learning/optimizer.rb", "lib/scrappy/learning/trainer.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb"]
         | 
| 15 | 
            -
              s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "extractors/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/learning/optimizer.rb", "lib/scrappy/learning/trainer.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/annotator.js", "public/javascripts/remote.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/extractors.haml", "views/help.haml", "views/home.haml", "views/layout.haml", "views/patterns.haml", "views/samples.haml", "scrappy.gemspec"]
         | 
| 15 | 
            +
              s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "extractors/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/learning/optimizer.rb", "lib/scrappy/learning/trainer.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/annotator.js", "public/javascripts/remote.js", "public/javascripts/utils.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/extractors.haml", "views/help.haml", "views/home.haml", "views/layout.haml", "views/patterns.haml", "views/samples.haml", "views/test.haml", "scrappy.gemspec"]
         | 
| 16 16 | 
             
              s.homepage = %q{http://github.com/josei/scrappy}
         | 
| 17 17 | 
             
              s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Scrappy", "--main", "README.rdoc"]
         | 
| 18 18 | 
             
              s.require_paths = ["lib"]
         | 
    
        data/views/test.haml
    ADDED
    
    | @@ -0,0 +1,73 @@ | |
| 1 | 
            +
            #body
         | 
| 2 | 
            +
              %h1 Testing extraction
         | 
| 3 | 
            +
              %h2 Results
         | 
| 4 | 
            +
              %p
         | 
| 5 | 
            +
                -(@results.keys-[:total]).each do |label|
         | 
| 6 | 
            +
                  ==#{label}:
         | 
| 7 | 
            +
                  %ul.detail
         | 
| 8 | 
            +
                    %li
         | 
| 9 | 
            +
                      %span.name
         | 
| 10 | 
            +
                        Precision
         | 
| 11 | 
            +
                      %span.date
         | 
| 12 | 
            +
                        =percentage(@results[label][:precision])
         | 
| 13 | 
            +
                    %li
         | 
| 14 | 
            +
                      %span.name
         | 
| 15 | 
            +
                        Recall
         | 
| 16 | 
            +
                      %span.date
         | 
| 17 | 
            +
                        =percentage(@results[label][:recall])
         | 
| 18 | 
            +
                    %li
         | 
| 19 | 
            +
                      %span.name
         | 
| 20 | 
            +
                        F-score
         | 
| 21 | 
            +
                      %span.date
         | 
| 22 | 
            +
                        =percentage(@results[label][:fscore])
         | 
| 23 | 
            +
                Total:
         | 
| 24 | 
            +
                %ul.detail
         | 
| 25 | 
            +
                  %li
         | 
| 26 | 
            +
                    %span.name
         | 
| 27 | 
            +
                      Precision
         | 
| 28 | 
            +
                    %span.date
         | 
| 29 | 
            +
                      =percentage(@results[:total][:precision])
         | 
| 30 | 
            +
                  %li
         | 
| 31 | 
            +
                    %span.name
         | 
| 32 | 
            +
                      Recall
         | 
| 33 | 
            +
                    %span.date
         | 
| 34 | 
            +
                      =percentage(@results[:total][:recall])
         | 
| 35 | 
            +
                  %li
         | 
| 36 | 
            +
                    %span.name
         | 
| 37 | 
            +
                      F-score
         | 
| 38 | 
            +
                    %span.date
         | 
| 39 | 
            +
                      =percentage(@results[:total][:fscore])
         | 
| 40 | 
            +
             | 
| 41 | 
            +
              %h2 Details
         | 
| 42 | 
            +
              %p
         | 
| 43 | 
            +
                %ul.detail
         | 
| 44 | 
            +
                  %li
         | 
| 45 | 
            +
                    %span.name
         | 
| 46 | 
            +
                      Triples
         | 
| 47 | 
            +
                    %span.date
         | 
| 48 | 
            +
                      =@total
         | 
| 49 | 
            +
                  %li
         | 
| 50 | 
            +
                    %span.name
         | 
| 51 | 
            +
                      Extracted triples
         | 
| 52 | 
            +
                    %span.date
         | 
| 53 | 
            +
                      =@extracted
         | 
| 54 | 
            +
                  %li
         | 
| 55 | 
            +
                    %span.name
         | 
| 56 | 
            +
                      Correct triples
         | 
| 57 | 
            +
                    %span.date
         | 
| 58 | 
            +
                      =@correct
         | 
| 59 | 
            +
                Wrong triples:
         | 
| 60 | 
            +
                %pre.wide=escape_html @wrong.to_ntriples
         | 
| 61 | 
            +
                Missing triples:
         | 
| 62 | 
            +
                %pre.wide=escape_html @missing.to_ntriples
         | 
| 63 | 
            +
              %h2
         | 
| 64 | 
            +
                Retry?
         | 
| 65 | 
            +
              %form{:method=>:post}
         | 
| 66 | 
            +
                %p
         | 
| 67 | 
            +
                  Add here any triples that should be counted as correct ones:
         | 
| 68 | 
            +
                %p
         | 
| 69 | 
            +
                  %textarea{:name=>:output, :rows=>20, :wrap=>:off}=params["output"]
         | 
| 70 | 
            +
                  -(params[:samples] || []).each do |sample|
         | 
| 71 | 
            +
                    %input{:name=>"samples[]", :type=>:hidden, :value=>sample}
         | 
| 72 | 
            +
                %p
         | 
| 73 | 
            +
                  %button Retry
         | 
    
        metadata
    CHANGED
    
    | @@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version | |
| 5 5 | 
             
              segments: 
         | 
| 6 6 | 
             
              - 0
         | 
| 7 7 | 
             
              - 4
         | 
| 8 | 
            -
              -  | 
| 9 | 
            -
              version: 0.4. | 
| 8 | 
            +
              - 3
         | 
| 9 | 
            +
              version: 0.4.3
         | 
| 10 10 | 
             
            platform: ruby
         | 
| 11 11 | 
             
            authors: 
         | 
| 12 12 | 
             
            - Jose Ignacio
         | 
| @@ -14,7 +14,7 @@ autorequire: | |
| 14 14 | 
             
            bindir: bin
         | 
| 15 15 | 
             
            cert_chain: []
         | 
| 16 16 |  | 
| 17 | 
            -
            date: 2011-07- | 
| 17 | 
            +
            date: 2011-07-11 00:00:00 +02:00
         | 
| 18 18 | 
             
            default_executable: 
         | 
| 19 19 | 
             
            dependencies: 
         | 
| 20 20 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| @@ -235,6 +235,7 @@ files: | |
| 235 235 | 
             
            - public/images/logo_tiny.png
         | 
| 236 236 | 
             
            - public/javascripts/annotator.js
         | 
| 237 237 | 
             
            - public/javascripts/remote.js
         | 
| 238 | 
            +
            - public/javascripts/utils.js
         | 
| 238 239 | 
             
            - public/stylesheets/application.css
         | 
| 239 240 | 
             
            - test/test_helper.rb
         | 
| 240 241 | 
             
            - test/test_scrappy.rb
         | 
| @@ -244,6 +245,7 @@ files: | |
| 244 245 | 
             
            - views/layout.haml
         | 
| 245 246 | 
             
            - views/patterns.haml
         | 
| 246 247 | 
             
            - views/samples.haml
         | 
| 248 | 
            +
            - views/test.haml
         | 
| 247 249 | 
             
            - scrappy.gemspec
         | 
| 248 250 | 
             
            has_rdoc: true
         | 
| 249 251 | 
             
            homepage: http://github.com/josei/scrappy
         |