sinew 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/LICENSE +1 -1
 - data/README.md +27 -18
 - data/bin/sinew +6 -4
 - data/lib/sinew/dsl.rb +21 -8
 - data/lib/sinew/main.rb +6 -0
 - data/lib/sinew/output.rb +20 -4
 - data/lib/sinew/request.rb +4 -1
 - data/lib/sinew/response.rb +30 -11
 - data/lib/sinew/runtime_options.rb +2 -0
 - data/lib/sinew/version.rb +1 -1
 - data/test/recipes/array_header.sinew +6 -0
 - data/test/recipes/basic.sinew +8 -0
 - data/test/recipes/dups.sinew +7 -0
 - data/test/recipes/implicit_header.sinew +5 -0
 - data/test/recipes/limit.sinew +11 -0
 - data/test/recipes/noko.sinew +9 -0
 - data/test/recipes/uri.sinew +11 -0
 - data/test/recipes/xml.sinew +8 -0
 - data/test/test_helper.rb +22 -12
 - data/test/test_legacy.rb +4 -2
 - data/test/test_main.rb +8 -20
 - data/test/test_output.rb +2 -19
 - data/test/test_recipes.rb +60 -0
 - metadata +20 -2
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 33506a03f47a88cae5bf7e0f4675d7cf83d86ba3c96f0880f5c473a7b23b167b
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 990bd4690f9fe799774c349314a32ab2c08979d555f03891316c5e0be8a4ad3d
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 9644097a2e11d8cba59a7985dfe770f27b00d5d18b676d0cacdee3e73a21f1b6c237b3bb58d68489d2a67fc981f7a7f8bb27a6e6fb23781f318cde78b392d7cd
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 667c301e7896b27162a77cff5165f264a0c2b73afbe5c35f541181709118185a13241d187ff8f8d5964e302537064ff927d9fa64ece2cb10ca65ba7dd89ce807
         
     | 
    
        data/LICENSE
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | 
         @@ -1,11 +1,13 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            ## Welcome to Sinew
         
     | 
| 
       2 
4 
     | 
    
         | 
| 
       3 
5 
     | 
    
         
             
            Sinew collects structured data from web sites (screen scraping). It provides a Ruby DSL built for crawling, a robust caching system, and integration with [Nokogiri](http://nokogiri.org). Though small, this project is the culmination of years of effort based on crawling systems built at several different companies.
         
     | 
| 
       4 
6 
     | 
    
         | 
| 
       5 
7 
     | 
    
         
             
            Sinew is distributed as a ruby gem:
         
     | 
| 
       6 
8 
     | 
    
         | 
| 
       7 
     | 
    
         
            -
            ``` 
     | 
| 
       8 
     | 
    
         
            -
            gem install sinew
         
     | 
| 
      
 9 
     | 
    
         
            +
            ```sh
         
     | 
| 
      
 10 
     | 
    
         
            +
            $ gem install sinew
         
     | 
| 
       9 
11 
     | 
    
         
             
            ```
         
     | 
| 
       10 
12 
     | 
    
         | 
| 
       11 
13 
     | 
    
         
             
            or in your Gemfile:
         
     | 
| 
         @@ -16,17 +18,16 @@ gem 'sinew' 
     | 
|
| 
       16 
18 
     | 
    
         | 
| 
       17 
19 
     | 
    
         
             
            ## Table of Contents
         
     | 
| 
       18 
20 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
            <!---
         
     | 
| 
       20 
     | 
    
         
            -
            markdown-toc --no-firsth1 --maxdepth 1 readme.md
         
     | 
| 
       21 
     | 
    
         
            -
            -->
         
     | 
| 
      
 21 
     | 
    
         
            +
            <!--- markdown-toc --no-firsth1 --maxdepth 1 readme.md -->
         
     | 
| 
       22 
22 
     | 
    
         | 
| 
       23 
     | 
    
         
            -
            * [Sinew 2 
     | 
| 
      
 23 
     | 
    
         
            +
            * [Sinew 2](#sinew-2-may-2018)
         
     | 
| 
       24 
24 
     | 
    
         
             
            * [Quick Example](#quick-example)
         
     | 
| 
       25 
25 
     | 
    
         
             
            * [How it Works](#how-it-works)
         
     | 
| 
       26 
26 
     | 
    
         
             
            * [DSL Reference](#dsl-reference)
         
     | 
| 
       27 
27 
     | 
    
         
             
            * [Hints](#hints)
         
     | 
| 
       28 
28 
     | 
    
         
             
            * [Limitations](#limitations)
         
     | 
| 
       29 
29 
     | 
    
         
             
            * [Changelog](#changelog)
         
     | 
| 
      
 30 
     | 
    
         
            +
            * [License](#license)
         
     | 
| 
       30 
31 
     | 
    
         | 
| 
       31 
32 
     | 
    
         
             
            ## Sinew 2 (May 2018)
         
     | 
| 
       32 
33 
     | 
    
         | 
| 
         @@ -34,7 +35,7 @@ I am pleased to announce the release of Sinew 2.0, a complete rewrite of Sinew f 
     | 
|
| 
       34 
35 
     | 
    
         | 
| 
       35 
36 
     | 
    
         
             
            * Remove dependencies on active_support, curl and tidy. We use HTTParty now.
         
     | 
| 
       36 
37 
     | 
    
         
             
            * Much easier to customize requests in `.sinew` files. For example, setting User-Agent or Bearer tokens.
         
     | 
| 
       37 
     | 
    
         
            -
            * More operations like `post_json` or the generic `http`. These methods are  
     | 
| 
      
 38 
     | 
    
         
            +
            * More operations like `post_json` or the generic `http`. These methods are thin wrappers around HTTParty.
         
     | 
| 
       38 
39 
     | 
    
         
             
            * New end-of-run report.
         
     | 
| 
       39 
40 
     | 
    
         
             
            * Tests, rubocop, vscode settings, travis, etc.
         
     | 
| 
       40 
41 
     | 
    
         | 
| 
         @@ -124,15 +125,18 @@ Because all requests are cached, you can run Sinew repeatedly with confidence. R 
     | 
|
| 
       124 
125 
     | 
    
         
             
            #### Making requests
         
     | 
| 
       125 
126 
     | 
    
         | 
| 
       126 
127 
     | 
    
         
             
            * `get(url, query = {})` - fetch a url with HTTP GET. URL parameters can be added using `query.
         
     | 
| 
       127 
     | 
    
         
            -
            * `post(url, form = {})` - fetch a url with HTTP POST, using `form` as the POST body.
         
     | 
| 
      
 128 
     | 
    
         
            +
            * `post(url, form = {})` - fetch a url with HTTP POST, using `form` as the URL encoded POST body.
         
     | 
| 
       128 
129 
     | 
    
         
             
            * `post_json(url, json = {})` - fetch a url with HTTP POST, using `json` as the POST body.
         
     | 
| 
       129 
130 
     | 
    
         
             
            * `http(method, url, options = {})` - use this for more complex requests
         
     | 
| 
       130 
131 
     | 
    
         | 
| 
       131 
132 
     | 
    
         
             
            #### Parsing the response
         
     | 
| 
       132 
133 
     | 
    
         | 
| 
      
 134 
     | 
    
         
            +
            These variables are set after each HTTP request.
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
       133 
136 
     | 
    
         
             
            * `raw` - the raw response from the last request
         
     | 
| 
       134 
137 
     | 
    
         
             
            * `html` - like `raw`, but with a handful of HTML-specific whitespace cleanups
         
     | 
| 
       135 
     | 
    
         
            -
            * `noko` - a [Nokogiri](http://nokogiri.org) document 
     | 
| 
      
 138 
     | 
    
         
            +
            * `noko` - parse the response as HTML and return a [Nokogiri](http://nokogiri.org) document
         
     | 
| 
      
 139 
     | 
    
         
            +
            * `xml` - parse the response as XML and return a [Nokogiri](http://nokogiri.org) document
         
     | 
| 
       136 
140 
     | 
    
         
             
            * `json` - parse the response as JSON, with symbolized keys
         
     | 
| 
       137 
141 
     | 
    
         
             
            * `url` - the url of the last request. If the request goes through a redirect, `url` will reflect the final url.
         
     | 
| 
       138 
142 
     | 
    
         
             
            * `uri` - the URI of the last request. This is useful for resolving relative URLs.
         
     | 
| 
         @@ -169,19 +173,24 @@ noko.css("table")[4].css("td").select { |i| i[:width].to_i > 80 }.map(&:text) 
     | 
|
| 
       169 
173 
     | 
    
         | 
| 
       170 
174 
     | 
    
         
             
            ## Changelog
         
     | 
| 
       171 
175 
     | 
    
         | 
| 
       172 
     | 
    
         
            -
            #### 2.0. 
     | 
| 
      
 176 
     | 
    
         
            +
            #### 2.0.2 (May 2018)
         
     | 
| 
       173 
177 
     | 
    
         | 
| 
       174 
     | 
    
         
            -
            *  
     | 
| 
      
 178 
     | 
    
         
            +
            * Support for `--limit`, `--proxy` and the `xml` variable
         
     | 
| 
      
 179 
     | 
    
         
            +
            * Dedup - warn and ignore if row[:url] has already been emitted
         
     | 
| 
      
 180 
     | 
    
         
            +
            * Auto gunzip if contents are compressed
         
     | 
| 
       175 
181 
     | 
    
         | 
| 
       176 
     | 
    
         
            -
            ####  
     | 
| 
      
 182 
     | 
    
         
            +
            #### 2.0.1 (May 2018)
         
     | 
| 
       177 
183 
     | 
    
         | 
| 
       178 
     | 
    
         
            -
            *  
     | 
| 
      
 184 
     | 
    
         
            +
            * Support for legacy cached `head` files from Sinew 1
         
     | 
| 
      
 185 
     | 
    
         
            +
             
     | 
| 
      
 186 
     | 
    
         
            +
            #### 2.0.0 (May 2018)
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
            * Complete rewrite. See above.
         
     | 
| 
       179 
189 
     | 
    
         | 
| 
       180 
     | 
    
         
            -
            #### 1.0. 
     | 
| 
      
 190 
     | 
    
         
            +
            #### 1.0.3 (June 2012)
         
     | 
| 
       181 
191 
     | 
    
         | 
| 
       182 
     | 
    
         
            -
             
     | 
| 
      
 192 
     | 
    
         
            +
            ...
         
     | 
| 
       183 
193 
     | 
    
         | 
| 
       184 
     | 
    
         
            -
             
     | 
| 
      
 194 
     | 
    
         
            +
            ## License
         
     | 
| 
       185 
195 
     | 
    
         | 
| 
       186 
     | 
    
         
            -
             
     | 
| 
       187 
     | 
    
         
            -
            * Added first batch of unit tests
         
     | 
| 
      
 196 
     | 
    
         
            +
            This extension is [licensed under the MIT License](LICENSE).
         
     | 
    
        data/bin/sinew
    CHANGED
    
    | 
         @@ -11,11 +11,13 @@ require 'slop' 
     | 
|
| 
       11 
11 
     | 
    
         | 
| 
       12 
12 
     | 
    
         
             
            options = Slop.parse do |o|
         
     | 
| 
       13 
13 
     | 
    
         
             
              o.banner = 'Usage: sinew [options] <gub.sinew>'
         
     | 
| 
       14 
     | 
    
         
            -
              o.bool '-v', '--verbose', 'dump  
     | 
| 
       15 
     | 
    
         
            -
              o.bool '--version', 'show version'
         
     | 
| 
      
 14 
     | 
    
         
            +
              o.bool '-v', '--verbose', 'dump emitted rows while running'
         
     | 
| 
       16 
15 
     | 
    
         
             
              o.bool '-q', '--quiet', 'suppress some output'
         
     | 
| 
       17 
     | 
    
         
            -
              o. 
     | 
| 
       18 
     | 
    
         
            -
              o. 
     | 
| 
      
 16 
     | 
    
         
            +
              o.integer '-l', '--limit', 'quit after emitting this many rows'
         
     | 
| 
      
 17 
     | 
    
         
            +
              o.string '-c', '--cache', 'set custom cache directory', default: "#{ENV['HOME']}/.sinew"
         
     | 
| 
      
 18 
     | 
    
         
            +
              o.string '--proxy', 'use host[:port] as HTTP proxy'
         
     | 
| 
      
 19 
     | 
    
         
            +
              o.bool '--version', 'show version and exit'
         
     | 
| 
      
 20 
     | 
    
         
            +
              o.on('--help', 'show this help') do
         
     | 
| 
       19 
21 
     | 
    
         
             
                puts o
         
     | 
| 
       20 
22 
     | 
    
         
             
                exit
         
     | 
| 
       21 
23 
     | 
    
         
             
              end
         
     | 
    
        data/lib/sinew/dsl.rb
    CHANGED
    
    | 
         @@ -7,6 +7,9 @@ require 'cgi' 
     | 
|
| 
       7 
7 
     | 
    
         | 
| 
       8 
8 
     | 
    
         
             
            module Sinew
         
     | 
| 
       9 
9 
     | 
    
         
             
              class DSL
         
     | 
| 
      
 10 
     | 
    
         
            +
                # this is used to break out of --limit
         
     | 
| 
      
 11 
     | 
    
         
            +
                class LimitError < StandardError; end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
       10 
13 
     | 
    
         
             
                attr_reader :sinew, :raw, :uri, :elapsed
         
     | 
| 
       11 
14 
     | 
    
         | 
| 
       12 
15 
     | 
    
         
             
                def initialize(sinew)
         
     | 
| 
         @@ -15,8 +18,12 @@ module Sinew 
     | 
|
| 
       15 
18 
     | 
    
         | 
| 
       16 
19 
     | 
    
         
             
                def run
         
     | 
| 
       17 
20 
     | 
    
         
             
                  tm = Time.now
         
     | 
| 
       18 
     | 
    
         
            -
                   
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
      
 21 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 22 
     | 
    
         
            +
                    recipe = sinew.options[:recipe]
         
     | 
| 
      
 23 
     | 
    
         
            +
                    instance_eval(File.read(recipe, mode: 'rb'), recipe)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  rescue LimitError
         
     | 
| 
      
 25 
     | 
    
         
            +
                    # ignore - this is flow control for --limit
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
       20 
27 
     | 
    
         
             
                  @elapsed = Time.now - tm
         
     | 
| 
       21 
28 
     | 
    
         
             
                end
         
     | 
| 
       22 
29 
     | 
    
         | 
| 
         @@ -46,14 +53,13 @@ module Sinew 
     | 
|
| 
       46 
53 
     | 
    
         | 
| 
       47 
54 
     | 
    
         
             
                def http(method, url, options = {})
         
     | 
| 
       48 
55 
     | 
    
         
             
                  # reset
         
     | 
| 
       49 
     | 
    
         
            -
                   
     | 
| 
      
 56 
     | 
    
         
            +
                  instance_variables.each do |i|
         
     | 
| 
      
 57 
     | 
    
         
            +
                    instance_variable_set(i, nil) if i != :@sinew
         
     | 
| 
      
 58 
     | 
    
         
            +
                  end
         
     | 
| 
       50 
59 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
                  # fetch
         
     | 
| 
      
 60 
     | 
    
         
            +
                  # fetch and make response available to callers
         
     | 
| 
       52 
61 
     | 
    
         
             
                  response = sinew.http(method, url, options)
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
                  # respond
         
     | 
| 
       55 
     | 
    
         
            -
                  @uri = response.uri
         
     | 
| 
       56 
     | 
    
         
            -
                  @raw = response.body
         
     | 
| 
      
 62 
     | 
    
         
            +
                  @uri, @raw = response.uri, response.body
         
     | 
| 
       57 
63 
     | 
    
         
             
                end
         
     | 
| 
       58 
64 
     | 
    
         | 
| 
       59 
65 
     | 
    
         
             
                #
         
     | 
| 
         @@ -75,6 +81,10 @@ module Sinew 
     | 
|
| 
       75 
81 
     | 
    
         
             
                  @noko ||= Nokogiri::HTML(html)
         
     | 
| 
       76 
82 
     | 
    
         
             
                end
         
     | 
| 
       77 
83 
     | 
    
         | 
| 
      
 84 
     | 
    
         
            +
                def xml
         
     | 
| 
      
 85 
     | 
    
         
            +
                  @xml ||= Nokogiri::XML(html)
         
     | 
| 
      
 86 
     | 
    
         
            +
                end
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
       78 
88 
     | 
    
         
             
                def json
         
     | 
| 
       79 
89 
     | 
    
         
             
                  @json ||= JSON.parse(raw, symbolize_names: true)
         
     | 
| 
       80 
90 
     | 
    
         
             
                end
         
     | 
| 
         @@ -93,6 +103,9 @@ module Sinew 
     | 
|
| 
       93 
103 
     | 
    
         | 
| 
       94 
104 
     | 
    
         
             
                def csv_emit(row)
         
     | 
| 
       95 
105 
     | 
    
         
             
                  sinew.output.emit(row)
         
     | 
| 
      
 106 
     | 
    
         
            +
                  if sinew.output.count == sinew.options[:limit]
         
     | 
| 
      
 107 
     | 
    
         
            +
                    raise LimitError.new
         
     | 
| 
      
 108 
     | 
    
         
            +
                  end
         
     | 
| 
       96 
109 
     | 
    
         
             
                end
         
     | 
| 
       97 
110 
     | 
    
         
             
              end
         
     | 
| 
       98 
111 
     | 
    
         
             
            end
         
     | 
    
        data/lib/sinew/main.rb
    CHANGED
    
    | 
         @@ -15,6 +15,12 @@ module Sinew 
     | 
|
| 
       15 
15 
     | 
    
         
             
                  @runtime_options = RuntimeOptions.new
         
     | 
| 
       16 
16 
     | 
    
         
             
                  @request_tm = Time.at(0)
         
     | 
| 
       17 
17 
     | 
    
         
             
                  @request_count = 0
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                  if options[:proxy]
         
     | 
| 
      
 20 
     | 
    
         
            +
                    addr, port = options[:proxy].split(':')
         
     | 
| 
      
 21 
     | 
    
         
            +
                    runtime_options.httparty_options[:http_proxyaddr] = addr
         
     | 
| 
      
 22 
     | 
    
         
            +
                    runtime_options.httparty_options[:http_proxyport] = port || 80
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
       18 
24 
     | 
    
         
             
                end
         
     | 
| 
       19 
25 
     | 
    
         | 
| 
       20 
26 
     | 
    
         
             
                def run
         
     | 
    
        data/lib/sinew/output.rb
    CHANGED
    
    | 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'csv'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'set'
         
     | 
| 
       2 
3 
     | 
    
         
             
            require 'stringex'
         
     | 
| 
       3 
4 
     | 
    
         | 
| 
       4 
5 
     | 
    
         
             
            #
         
     | 
| 
         @@ -7,11 +8,12 @@ require 'stringex' 
     | 
|
| 
       7 
8 
     | 
    
         | 
| 
       8 
9 
     | 
    
         
             
            module Sinew
         
     | 
| 
       9 
10 
     | 
    
         
             
              class Output
         
     | 
| 
       10 
     | 
    
         
            -
                attr_reader :sinew, :columns, :rows, :csv
         
     | 
| 
      
 11 
     | 
    
         
            +
                attr_reader :sinew, :columns, :rows, :urls, :csv
         
     | 
| 
       11 
12 
     | 
    
         | 
| 
       12 
13 
     | 
    
         
             
                def initialize(sinew)
         
     | 
| 
       13 
14 
     | 
    
         
             
                  @sinew = sinew
         
     | 
| 
       14 
15 
     | 
    
         
             
                  @rows = []
         
     | 
| 
      
 16 
     | 
    
         
            +
                  @urls = Set.new
         
     | 
| 
       15 
17 
     | 
    
         
             
                end
         
     | 
| 
       16 
18 
     | 
    
         | 
| 
       17 
19 
     | 
    
         
             
                def filename
         
     | 
| 
         @@ -41,6 +43,8 @@ module Sinew 
     | 
|
| 
       41 
43 
     | 
    
         
             
                  # implicit header if necessary
         
     | 
| 
       42 
44 
     | 
    
         
             
                  header(row.keys) if !csv
         
     | 
| 
       43 
45 
     | 
    
         | 
| 
      
 46 
     | 
    
         
            +
                  # don't allow duplicate urls
         
     | 
| 
      
 47 
     | 
    
         
            +
                  return if dup_url?(row)
         
     | 
| 
       44 
48 
     | 
    
         
             
                  rows << row.dup
         
     | 
| 
       45 
49 
     | 
    
         | 
| 
       46 
50 
     | 
    
         
             
                  # map columns to row, and normalize along the way
         
     | 
| 
         @@ -94,6 +98,9 @@ module Sinew 
     | 
|
| 
       94 
98 
     | 
    
         
             
                    s.to_s
         
     | 
| 
       95 
99 
     | 
    
         
             
                  end
         
     | 
| 
       96 
100 
     | 
    
         | 
| 
      
 101 
     | 
    
         
            +
                  # strip html tags. Note that we replace tags with spaces
         
     | 
| 
      
 102 
     | 
    
         
            +
                  s = s.gsub(/<[^>]+>/, ' ')
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
       97 
104 
     | 
    
         
             
                  #
         
     | 
| 
       98 
105 
     | 
    
         
             
                  # Below uses stringex
         
     | 
| 
       99 
106 
     | 
    
         
             
                  #
         
     | 
| 
         @@ -101,9 +108,6 @@ module Sinew 
     | 
|
| 
       101 
108 
     | 
    
         
             
                  # github.com/rsl/stringex/blob/master/lib/stringex/localization/conversion_expressions.rb
         
     | 
| 
       102 
109 
     | 
    
         
             
                  #
         
     | 
| 
       103 
110 
     | 
    
         | 
| 
       104 
     | 
    
         
            -
                  # <a>b</a> => b
         
     | 
| 
       105 
     | 
    
         
            -
                  s = s.strip_html_tags
         
     | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
       107 
111 
     | 
    
         
             
                  # Converts MS Word 'smart punctuation' to ASCII
         
     | 
| 
       108 
112 
     | 
    
         
             
                  s = s.convert_smart_punctuation
         
     | 
| 
       109 
113 
     | 
    
         | 
| 
         @@ -122,5 +126,17 @@ module Sinew 
     | 
|
| 
       122 
126 
     | 
    
         
             
                  s
         
     | 
| 
       123 
127 
     | 
    
         
             
                end
         
     | 
| 
       124 
128 
     | 
    
         
             
                protected :normalize
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
                def dup_url?(row)
         
     | 
| 
      
 131 
     | 
    
         
            +
                  if url = row[:url]
         
     | 
| 
      
 132 
     | 
    
         
            +
                    if urls.include?(url)
         
     | 
| 
      
 133 
     | 
    
         
            +
                      sinew.warning("duplicate url: #{url}") if !sinew.quiet?
         
     | 
| 
      
 134 
     | 
    
         
            +
                      return true
         
     | 
| 
      
 135 
     | 
    
         
            +
                    end
         
     | 
| 
      
 136 
     | 
    
         
            +
                    urls << url
         
     | 
| 
      
 137 
     | 
    
         
            +
                  end
         
     | 
| 
      
 138 
     | 
    
         
            +
                  false
         
     | 
| 
      
 139 
     | 
    
         
            +
                end
         
     | 
| 
      
 140 
     | 
    
         
            +
                protected :dup_url?
         
     | 
| 
       125 
141 
     | 
    
         
             
              end
         
     | 
| 
       126 
142 
     | 
    
         
             
            end
         
     | 
    
        data/lib/sinew/request.rb
    CHANGED
    
    | 
         @@ -28,7 +28,10 @@ module Sinew 
     | 
|
| 
       28 
28 
     | 
    
         
             
                def perform
         
     | 
| 
       29 
29 
     | 
    
         
             
                  validate!
         
     | 
| 
       30 
30 
     | 
    
         | 
| 
       31 
     | 
    
         
            -
                  # merge  
     | 
| 
      
 31 
     | 
    
         
            +
                  # merge optons
         
     | 
| 
      
 32 
     | 
    
         
            +
                  options = self.options.merge(sinew.runtime_options.httparty_options)
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                  # merge headers
         
     | 
| 
       32 
35 
     | 
    
         
             
                  headers = sinew.runtime_options.headers
         
     | 
| 
       33 
36 
     | 
    
         
             
                  headers = headers.merge(options[:headers]) if options[:headers]
         
     | 
| 
       34 
37 
     | 
    
         
             
                  options[:headers] = headers
         
     | 
    
        data/lib/sinew/response.rb
    CHANGED
    
    | 
         @@ -1,3 +1,6 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'stringio'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'zlib'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
       1 
4 
     | 
    
         
             
            #
         
     | 
| 
       2 
5 
     | 
    
         
             
            # An HTTP response. Mostly a wrapper around HTTParty.
         
     | 
| 
       3 
6 
     | 
    
         
             
            #
         
     | 
| 
         @@ -16,13 +19,7 @@ module Sinew 
     | 
|
| 
       16 
19 
     | 
    
         
             
                    response.uri = party_response.request.last_uri
         
     | 
| 
       17 
20 
     | 
    
         
             
                    response.code = party_response.code
         
     | 
| 
       18 
21 
     | 
    
         
             
                    response.headers = party_response.headers.to_h
         
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
                    # force to utf-8 as best we can
         
     | 
| 
       21 
     | 
    
         
            -
                    body = party_response.body
         
     | 
| 
       22 
     | 
    
         
            -
                    if body.encoding != Encoding::UTF_8
         
     | 
| 
       23 
     | 
    
         
            -
                      body = body.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
         
     | 
| 
       24 
     | 
    
         
            -
                    end
         
     | 
| 
       25 
     | 
    
         
            -
                    response.body = body
         
     | 
| 
      
 22 
     | 
    
         
            +
                    response.body = process_body(party_response)
         
     | 
| 
       26 
23 
     | 
    
         
             
                  end
         
     | 
| 
       27 
24 
     | 
    
         
             
                end
         
     | 
| 
       28 
25 
     | 
    
         | 
| 
         @@ -60,21 +57,43 @@ module Sinew 
     | 
|
| 
       60 
57 
     | 
    
         
             
                end
         
     | 
| 
       61 
58 
     | 
    
         | 
| 
       62 
59 
     | 
    
         
             
                def self.from_legacy_head(response, head)
         
     | 
| 
       63 
     | 
    
         
            -
                  response.tap do | 
     | 
| 
      
 60 
     | 
    
         
            +
                  response.tap do |r|
         
     | 
| 
       64 
61 
     | 
    
         
             
                    case head
         
     | 
| 
       65 
62 
     | 
    
         
             
                    when /\ACURLER_ERROR/
         
     | 
| 
       66 
63 
     | 
    
         
             
                      # error
         
     | 
| 
       67 
     | 
    
         
            -
                       
     | 
| 
      
 64 
     | 
    
         
            +
                      r.code = 999
         
     | 
| 
       68 
65 
     | 
    
         
             
                    when /\AHTTP/
         
     | 
| 
       69 
66 
     | 
    
         
             
                      # redirect
         
     | 
| 
       70 
67 
     | 
    
         
             
                      location = head.scan(/Location: ([^\r\n]+)/).flatten.last
         
     | 
| 
       71 
     | 
    
         
            -
                       
     | 
| 
      
 68 
     | 
    
         
            +
                      r.uri += location
         
     | 
| 
       72 
69 
     | 
    
         
             
                    else
         
     | 
| 
       73 
     | 
    
         
            -
                      $stderr.puts "unknown cached /head for #{ 
     | 
| 
      
 70 
     | 
    
         
            +
                      $stderr.puts "unknown cached /head for #{r.uri}"
         
     | 
| 
       74 
71 
     | 
    
         
             
                    end
         
     | 
| 
       75 
72 
     | 
    
         
             
                  end
         
     | 
| 
       76 
73 
     | 
    
         
             
                end
         
     | 
| 
       77 
74 
     | 
    
         | 
| 
      
 75 
     | 
    
         
            +
                # helper for decoding bodies before parsing
         
     | 
| 
      
 76 
     | 
    
         
            +
                def self.process_body(response)
         
     | 
| 
      
 77 
     | 
    
         
            +
                  body = response.body
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
                  # inflate if necessary
         
     | 
| 
      
 80 
     | 
    
         
            +
                  bits = body[0, 10].force_encoding('BINARY')
         
     | 
| 
      
 81 
     | 
    
         
            +
                  if bits =~ /\A\x1f\x8b/n
         
     | 
| 
      
 82 
     | 
    
         
            +
                    body = Zlib::GzipReader.new(StringIO.new(body)).read
         
     | 
| 
      
 83 
     | 
    
         
            +
                  end
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                  # force to utf-8 if we think this could be text
         
     | 
| 
      
 86 
     | 
    
         
            +
                  if body.encoding != Encoding::UTF_8
         
     | 
| 
      
 87 
     | 
    
         
            +
                    if content_type = response.headers['content-type']
         
     | 
| 
      
 88 
     | 
    
         
            +
                      if content_type =~ /\b(html|javascript|json|text|xml)\b/
         
     | 
| 
      
 89 
     | 
    
         
            +
                        body = body.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
         
     | 
| 
      
 90 
     | 
    
         
            +
                      end
         
     | 
| 
      
 91 
     | 
    
         
            +
                    end
         
     | 
| 
      
 92 
     | 
    
         
            +
                  end
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                  body
         
     | 
| 
      
 95 
     | 
    
         
            +
                end
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
       78 
97 
     | 
    
         
             
                #
         
     | 
| 
       79 
98 
     | 
    
         
             
                # accessors
         
     | 
| 
       80 
99 
     | 
    
         
             
                #
         
     | 
| 
         @@ -7,6 +7,7 @@ module Sinew 
     | 
|
| 
       7 
7 
     | 
    
         
             
                attr_accessor :retries
         
     | 
| 
       8 
8 
     | 
    
         
             
                attr_accessor :rate_limit
         
     | 
| 
       9 
9 
     | 
    
         
             
                attr_accessor :headers
         
     | 
| 
      
 10 
     | 
    
         
            +
                attr_accessor :httparty_options
         
     | 
| 
       10 
11 
     | 
    
         
             
                attr_accessor :before_generate_cache_key
         
     | 
| 
       11 
12 
     | 
    
         | 
| 
       12 
13 
     | 
    
         
             
                def initialize
         
     | 
| 
         @@ -15,6 +16,7 @@ module Sinew 
     | 
|
| 
       15 
16 
     | 
    
         
             
                  self.headers = {
         
     | 
| 
       16 
17 
     | 
    
         
             
                    'User-Agent' => "sinew/#{VERSION}",
         
     | 
| 
       17 
18 
     | 
    
         
             
                  }
         
     | 
| 
      
 19 
     | 
    
         
            +
                  self.httparty_options = {}
         
     | 
| 
       18 
20 
     | 
    
         
             
                  self.before_generate_cache_key = ->(i) { i }
         
     | 
| 
       19 
21 
     | 
    
         | 
| 
       20 
22 
     | 
    
         
             
                  # for testing
         
     | 
    
        data/lib/sinew/version.rb
    CHANGED
    
    
    
        data/test/test_helper.rb
    CHANGED
    
    | 
         @@ -12,8 +12,6 @@ require 'sinew' 
     | 
|
| 
       12 
12 
     | 
    
         | 
| 
       13 
13 
     | 
    
         
             
            class MiniTest::Test
         
     | 
| 
       14 
14 
     | 
    
         
             
              TMP = '/tmp/_test_sinew'.freeze
         
     | 
| 
       15 
     | 
    
         
            -
              RECIPE = "#{TMP}/test.sinew".freeze
         
     | 
| 
       16 
     | 
    
         
            -
              CSV = "#{TMP}/test.csv".freeze
         
     | 
| 
       17 
15 
     | 
    
         
             
              HTML = File.read("#{__dir__}/test.html")
         
     | 
| 
       18 
16 
     | 
    
         | 
| 
       19 
17 
     | 
    
         
             
              def setup
         
     | 
| 
         @@ -27,16 +25,10 @@ class MiniTest::Test 
     | 
|
| 
       27 
25 
     | 
    
         
             
              end
         
     | 
| 
       28 
26 
     | 
    
         | 
| 
       29 
27 
     | 
    
         
             
              def sinew
         
     | 
| 
       30 
     | 
    
         
            -
                @sinew ||= Sinew::Main.new(cache: TMP, quiet: true, recipe:  
     | 
| 
      
 28 
     | 
    
         
            +
                @sinew ||= Sinew::Main.new(cache: TMP, quiet: true, recipe: "#{TMP}/ignore.sinew")
         
     | 
| 
       31 
29 
     | 
    
         
             
              end
         
     | 
| 
       32 
30 
     | 
    
         
             
              protected :sinew
         
     | 
| 
       33 
31 
     | 
    
         | 
| 
       34 
     | 
    
         
            -
              def run_recipe(recipe)
         
     | 
| 
       35 
     | 
    
         
            -
                File.write(RECIPE, recipe)
         
     | 
| 
       36 
     | 
    
         
            -
                sinew.run
         
     | 
| 
       37 
     | 
    
         
            -
              end
         
     | 
| 
       38 
     | 
    
         
            -
              protected :run_recipe
         
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
32 
     | 
    
         
             
              def test_network?
         
     | 
| 
       41 
33 
     | 
    
         
             
                !!ENV['SINEW_TEST_NETWORK']
         
     | 
| 
       42 
34 
     | 
    
         
             
              end
         
     | 
| 
         @@ -50,6 +42,7 @@ class MiniTest::Test 
     | 
|
| 
       50 
42 
     | 
    
         
             
                stub_request(:get, %r{http://[^/]+/status/\d+}).to_return(method(:respond_status))
         
     | 
| 
       51 
43 
     | 
    
         
             
                stub_request(:get, %r{http://[^/]+/(relative-)?redirect/\d+}).to_return(method(:respond_redirect))
         
     | 
| 
       52 
44 
     | 
    
         
             
                stub_request(:get, %r{http://[^/]+/delay/\d+}).to_timeout
         
     | 
| 
      
 45 
     | 
    
         
            +
                stub_request(:get, %r{http://[^/]+/xml}).to_return(method(:respond_xml))
         
     | 
| 
       53 
46 
     | 
    
         
             
              end
         
     | 
| 
       54 
47 
     | 
    
         
             
              protected :stub_network
         
     | 
| 
       55 
48 
     | 
    
         | 
| 
         @@ -58,7 +51,7 @@ class MiniTest::Test 
     | 
|
| 
       58 
51 
     | 
    
         
             
              #
         
     | 
| 
       59 
52 
     | 
    
         | 
| 
       60 
53 
     | 
    
         
             
              def respond_html(_request)
         
     | 
| 
       61 
     | 
    
         
            -
                # this html was carefully chosen to match httpbin.org/html
         
     | 
| 
      
 54 
     | 
    
         
            +
                # this html was carefully chosen to somewhat match httpbin.org/html
         
     | 
| 
       62 
55 
     | 
    
         
             
                html = <<~EOF
         
     | 
| 
       63 
56 
     | 
    
         
             
                  <body>
         
     | 
| 
       64 
57 
     | 
    
         
             
                    <h1>Herman Melville - Moby-Dick</h1>
         
     | 
| 
         @@ -68,20 +61,37 @@ class MiniTest::Test 
     | 
|
| 
       68 
61 
     | 
    
         
             
              end
         
     | 
| 
       69 
62 
     | 
    
         
             
              protected :respond_html
         
     | 
| 
       70 
63 
     | 
    
         | 
| 
      
 64 
     | 
    
         
            +
              def respond_xml(_request)
         
     | 
| 
      
 65 
     | 
    
         
            +
                # this xml was carefully chosen to somewhat match httpbin.org/xml
         
     | 
| 
      
 66 
     | 
    
         
            +
                xml = <<~EOF
         
     | 
| 
      
 67 
     | 
    
         
            +
                  <!--   A SAMPLE set of slides   -->
         
     | 
| 
      
 68 
     | 
    
         
            +
                  <slideshow>
         
     | 
| 
      
 69 
     | 
    
         
            +
                    <slide type="all">
         
     | 
| 
      
 70 
     | 
    
         
            +
                      <title>Wake up to WonderWidgets!</title>
         
     | 
| 
      
 71 
     | 
    
         
            +
                    </slide>
         
     | 
| 
      
 72 
     | 
    
         
            +
                    <slide type="all">
         
     | 
| 
      
 73 
     | 
    
         
            +
                      <title>Overview</title>
         
     | 
| 
      
 74 
     | 
    
         
            +
                    </slide>
         
     | 
| 
      
 75 
     | 
    
         
            +
                  </slideshow>
         
     | 
| 
      
 76 
     | 
    
         
            +
                EOF
         
     | 
| 
      
 77 
     | 
    
         
            +
                { body: xml }
         
     | 
| 
      
 78 
     | 
    
         
            +
              end
         
     | 
| 
      
 79 
     | 
    
         
            +
              protected :respond_xml
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
       71 
81 
     | 
    
         
             
              def respond_echo(request)
         
     | 
| 
       72 
82 
     | 
    
         
             
                response = {}
         
     | 
| 
       73 
83 
     | 
    
         
             
                response[:headers] = request.headers
         
     | 
| 
       74 
84 
     | 
    
         | 
| 
       75 
85 
     | 
    
         
             
                # args
         
     | 
| 
       76 
86 
     | 
    
         
             
                response[:args] = if request.uri.query
         
     | 
| 
       77 
     | 
    
         
            -
                  CGI.parse(request.uri.query).map { |k, v| [k, v.first] }.to_h
         
     | 
| 
      
 87 
     | 
    
         
            +
                  CGI.parse(request.uri.query).map { |k, v| [ k, v.first ] }.to_h
         
     | 
| 
       78 
88 
     | 
    
         
             
                else
         
     | 
| 
       79 
89 
     | 
    
         
             
                  {}
         
     | 
| 
       80 
90 
     | 
    
         
             
                end
         
     | 
| 
       81 
91 
     | 
    
         | 
| 
       82 
92 
     | 
    
         
             
                # form
         
     | 
| 
       83 
93 
     | 
    
         
             
                if request.headers['Content-Type'] == 'application/x-www-form-urlencoded'
         
     | 
| 
       84 
     | 
    
         
            -
                  response[:form] = CGI.parse(request.body).map { |k, v| [k, v.first] }.to_h
         
     | 
| 
      
 94 
     | 
    
         
            +
                  response[:form] = CGI.parse(request.body).map { |k, v| [ k, v.first ] }.to_h
         
     | 
| 
       85 
95 
     | 
    
         
             
                end
         
     | 
| 
       86 
96 
     | 
    
         | 
| 
       87 
97 
     | 
    
         
             
                # json
         
     | 
    
        data/test/test_legacy.rb
    CHANGED
    
    | 
         @@ -12,8 +12,10 @@ class TestLegacy < MiniTest::Test 
     | 
|
| 
       12 
12 
     | 
    
         
             
              end
         
     | 
| 
       13 
13 
     | 
    
         | 
| 
       14 
14 
     | 
    
         
             
              def test_legacy
         
     | 
| 
       15 
     | 
    
         
            -
                 
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
      
 15 
     | 
    
         
            +
                assert_output(/failed with 999/) do
         
     | 
| 
      
 16 
     | 
    
         
            +
                  sinew.dsl.get('http://eu.httpbin.org/status/500')
         
     | 
| 
      
 17 
     | 
    
         
            +
                  assert_equal "\n", sinew.dsl.raw
         
     | 
| 
      
 18 
     | 
    
         
            +
                end
         
     | 
| 
       17 
19 
     | 
    
         | 
| 
       18 
20 
     | 
    
         
             
                sinew.dsl.get('http://eu.httpbin.org/redirect/3')
         
     | 
| 
       19 
21 
     | 
    
         
             
                assert_equal 'http://eu.httpbin.org/get', sinew.dsl.url
         
     | 
    
        data/test/test_main.rb
    CHANGED
    
    | 
         @@ -1,26 +1,8 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require_relative 'test_helper'
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
              def test_noko
         
     | 
| 
       5 
     | 
    
         
            -
                run_recipe <<~'EOF'
         
     | 
| 
       6 
     | 
    
         
            -
                  get 'http://httpbin.org/html'
         
     | 
| 
       7 
     | 
    
         
            -
                  noko.css("h1").each do |h1|
         
     | 
| 
       8 
     | 
    
         
            -
                    csv_emit(h1: h1.text)
         
     | 
| 
       9 
     | 
    
         
            -
                  end
         
     | 
| 
       10 
     | 
    
         
            -
                EOF
         
     | 
| 
       11 
     | 
    
         
            -
                assert_equal("h1\nHerman Melville - Moby-Dick\n", File.read(CSV))
         
     | 
| 
       12 
     | 
    
         
            -
              end
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
              def test_raw
         
     | 
| 
       15 
     | 
    
         
            -
                run_recipe <<~'EOF'
         
     | 
| 
       16 
     | 
    
         
            -
                  get "http://httpbin.org/html"
         
     | 
| 
       17 
     | 
    
         
            -
                  raw.scan(/<h1>([^<]+)/) do
         
     | 
| 
       18 
     | 
    
         
            -
                    csv_emit(h1: $1)
         
     | 
| 
       19 
     | 
    
         
            -
                  end
         
     | 
| 
       20 
     | 
    
         
            -
                EOF
         
     | 
| 
       21 
     | 
    
         
            -
                assert_equal("h1\nHerman Melville - Moby-Dick\n", File.read(CSV))
         
     | 
| 
       22 
     | 
    
         
            -
              end
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'base64'
         
     | 
| 
       23 
4 
     | 
    
         | 
| 
      
 5 
     | 
    
         
            +
            class TestMain < MiniTest::Test
         
     | 
| 
       24 
6 
     | 
    
         
             
              def test_rate_limit
         
     | 
| 
       25 
7 
     | 
    
         
             
                # true network requests call sleep for timeouts, which interferes with our
         
     | 
| 
       26 
8 
     | 
    
         
             
                # instrumentation of Kernel#sleep
         
     | 
| 
         @@ -43,4 +25,10 @@ class TestMain < MiniTest::Test 
     | 
|
| 
       43 
25 
     | 
    
         
             
                Kernel.send(:alias_method, :sleep, :old_sleep)
         
     | 
| 
       44 
26 
     | 
    
         
             
                Kernel.send(:undef_method, :old_sleep)
         
     | 
| 
       45 
27 
     | 
    
         
             
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
              def test_gunzip
         
     | 
| 
      
 30 
     | 
    
         
            +
                body = Base64.decode64('H4sICBRI61oAA2d1Yi50eHQASy9N4gIAJlqRYgQAAAA=')
         
     | 
| 
      
 31 
     | 
    
         
            +
                body = Sinew::Response.process_body(OpenStruct.new(body: body))
         
     | 
| 
      
 32 
     | 
    
         
            +
                assert_equal 'gub', body.strip
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
       46 
34 
     | 
    
         
             
            end
         
     | 
    
        data/test/test_output.rb
    CHANGED
    
    | 
         @@ -1,25 +1,6 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require_relative 'test_helper'
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            class TestOutput < MiniTest::Test
         
     | 
| 
       4 
     | 
    
         
            -
              def test_output
         
     | 
| 
       5 
     | 
    
         
            -
                sinew.dsl.csv_header(:n, :a, :p)
         
     | 
| 
       6 
     | 
    
         
            -
                sinew.dsl.csv_emit(n: 'n1', a: 'a1')
         
     | 
| 
       7 
     | 
    
         
            -
                sinew.dsl.csv_emit(n: 'n2', a: 'a2')
         
     | 
| 
       8 
     | 
    
         
            -
                assert_equal 2, sinew.output.count
         
     | 
| 
       9 
     | 
    
         
            -
                assert_equal "n,a,p\nn1,a1,\"\"\nn2,a2,\"\"\n", File.read(CSV)
         
     | 
| 
       10 
     | 
    
         
            -
              end
         
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
              def test_implicit_header
         
     | 
| 
       13 
     | 
    
         
            -
                sinew.dsl.csv_emit(name: 'bob', address: 'main')
         
     | 
| 
       14 
     | 
    
         
            -
                assert_equal "name,address\nbob,main\n", File.read(CSV)
         
     | 
| 
       15 
     | 
    
         
            -
              end
         
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
              def test_array_header
         
     | 
| 
       18 
     | 
    
         
            -
                sinew.dsl.csv_header(%i[n a p])
         
     | 
| 
       19 
     | 
    
         
            -
                sinew.dsl.csv_emit(n: 'n1', a: 'a1')
         
     | 
| 
       20 
     | 
    
         
            -
                assert_equal "n,a,p\nn1,a1,\"\"\n", File.read(CSV)
         
     | 
| 
       21 
     | 
    
         
            -
              end
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
4 
     | 
    
         
             
              def test_filenames
         
     | 
| 
       24 
5 
     | 
    
         
             
                sinew = Sinew::Main.new(recipe: 'gub.sinew')
         
     | 
| 
       25 
6 
     | 
    
         
             
                assert_equal 'gub.csv', sinew.output.filename
         
     | 
| 
         @@ -59,6 +40,8 @@ class TestOutput < MiniTest::Test 
     | 
|
| 
       59 
40 
     | 
    
         | 
| 
       60 
41 
     | 
    
         
             
                # strip_html_tags
         
     | 
| 
       61 
42 
     | 
    
         
             
                assert_equal('gub', output.send(:normalize, '<tag>gub</tag>'))
         
     | 
| 
      
 43 
     | 
    
         
            +
                # strip_html_tags and replace with spaces
         
     | 
| 
      
 44 
     | 
    
         
            +
                assert_equal('hello world', output.send(:normalize, '<tag>hello<br>world</tag>'))
         
     | 
| 
       62 
45 
     | 
    
         
             
                # convert_smart_punctuation
         
     | 
| 
       63 
46 
     | 
    
         
             
                assert_equal('"gub"', output.send(:normalize, "\302\223gub\302\224"))
         
     | 
| 
       64 
47 
     | 
    
         
             
                # convert_accented_html_entities
         
     | 
| 
         @@ -0,0 +1,60 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require_relative 'test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class TestRecipe < MiniTest::Test
         
     | 
| 
      
 4 
     | 
    
         
            +
              DIR = File.expand_path('recipes', __dir__)
         
     | 
| 
      
 5 
     | 
    
         
            +
              TEST_SINEW = "#{TMP}/test.sinew".freeze
         
     | 
| 
      
 6 
     | 
    
         
            +
              TEST_CSV = "#{TMP}/test.csv".freeze
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
              def test_recipes
         
     | 
| 
      
 9 
     | 
    
         
            +
                Dir.chdir(DIR) do
         
     | 
| 
      
 10 
     | 
    
         
            +
                  Dir['*.sinew'].sort.each do |filename|
         
     | 
| 
      
 11 
     | 
    
         
            +
                    recipe = IO.read(filename)
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                    # get ready
         
     | 
| 
      
 14 
     | 
    
         
            +
                    IO.write(TEST_SINEW, recipe)
         
     | 
| 
      
 15 
     | 
    
         
            +
                    sinew = Sinew::Main.new(cache: TMP, quiet: true, recipe: TEST_SINEW)
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                    # read OPTIONS
         
     | 
| 
      
 18 
     | 
    
         
            +
                    if options = options_from(recipe)
         
     | 
| 
      
 19 
     | 
    
         
            +
                      options.each do |key, value|
         
     | 
| 
      
 20 
     | 
    
         
            +
                        sinew.options[key] = value
         
     | 
| 
      
 21 
     | 
    
         
            +
                      end
         
     | 
| 
      
 22 
     | 
    
         
            +
                    end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                    # read OUTPUT
         
     | 
| 
      
 25 
     | 
    
         
            +
                    output = output_from(recipe, filename)
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                    # run
         
     | 
| 
      
 28 
     | 
    
         
            +
                    sinew.run
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                    # assert
         
     | 
| 
      
 31 
     | 
    
         
            +
                    csv = IO.read(TEST_CSV)
         
     | 
| 
      
 32 
     | 
    
         
            +
                    assert_equal(output, csv, "Output didn't match for recipes/#{filename}")
         
     | 
| 
      
 33 
     | 
    
         
            +
                  end
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
              end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
              def options_from(recipe)
         
     | 
| 
      
 38 
     | 
    
         
            +
                if options = recipe[/^#\s*OPTIONS\s*(\{.*\})/, 1]
         
     | 
| 
      
 39 
     | 
    
         
            +
                  # rubocop:disable Security/Eval
         
     | 
| 
      
 40 
     | 
    
         
            +
                  eval(options)
         
     | 
| 
      
 41 
     | 
    
         
            +
                  # rubocop:enable Security/Eval
         
     | 
| 
      
 42 
     | 
    
         
            +
                end
         
     | 
| 
      
 43 
     | 
    
         
            +
              end
         
     | 
| 
      
 44 
     | 
    
         
            +
              protected :options_from
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
              def output_from(recipe, filename)
         
     | 
| 
      
 47 
     | 
    
         
            +
                lines = recipe.split("\n")
         
     | 
| 
      
 48 
     | 
    
         
            +
                first_line = lines.index { |i| i =~ /^# OUTPUT/ }
         
     | 
| 
      
 49 
     | 
    
         
            +
                if !first_line
         
     | 
| 
      
 50 
     | 
    
         
            +
                  raise "# OUTPUT not found in recipes/#{filename}"
         
     | 
| 
      
 51 
     | 
    
         
            +
                end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                output = lines[first_line + 1..-1]
         
     | 
| 
      
 54 
     | 
    
         
            +
                output = output.map { |i| i.gsub(/^# /, '') }
         
     | 
| 
      
 55 
     | 
    
         
            +
                output = output.join("\n")
         
     | 
| 
      
 56 
     | 
    
         
            +
                output += "\n"
         
     | 
| 
      
 57 
     | 
    
         
            +
                output
         
     | 
| 
      
 58 
     | 
    
         
            +
              end
         
     | 
| 
      
 59 
     | 
    
         
            +
              protected :output_from
         
     | 
| 
      
 60 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: sinew
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 2.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 2.0.2
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Adam Doppelt
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2018-05- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2018-05-03 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: awesome_print
         
     | 
| 
         @@ -186,6 +186,14 @@ files: 
     | 
|
| 
       186 
186 
     | 
    
         
             
            - test/legacy/eu.httpbin.org/redirect,3
         
     | 
| 
       187 
187 
     | 
    
         
             
            - test/legacy/eu.httpbin.org/status,500
         
     | 
| 
       188 
188 
     | 
    
         
             
            - test/legacy/legacy.sinew
         
     | 
| 
      
 189 
     | 
    
         
            +
            - test/recipes/array_header.sinew
         
     | 
| 
      
 190 
     | 
    
         
            +
            - test/recipes/basic.sinew
         
     | 
| 
      
 191 
     | 
    
         
            +
            - test/recipes/dups.sinew
         
     | 
| 
      
 192 
     | 
    
         
            +
            - test/recipes/implicit_header.sinew
         
     | 
| 
      
 193 
     | 
    
         
            +
            - test/recipes/limit.sinew
         
     | 
| 
      
 194 
     | 
    
         
            +
            - test/recipes/noko.sinew
         
     | 
| 
      
 195 
     | 
    
         
            +
            - test/recipes/uri.sinew
         
     | 
| 
      
 196 
     | 
    
         
            +
            - test/recipes/xml.sinew
         
     | 
| 
       189 
197 
     | 
    
         
             
            - test/test.html
         
     | 
| 
       190 
198 
     | 
    
         
             
            - test/test_cache.rb
         
     | 
| 
       191 
199 
     | 
    
         
             
            - test/test_helper.rb
         
     | 
| 
         @@ -193,6 +201,7 @@ files: 
     | 
|
| 
       193 
201 
     | 
    
         
             
            - test/test_main.rb
         
     | 
| 
       194 
202 
     | 
    
         
             
            - test/test_nokogiri_ext.rb
         
     | 
| 
       195 
203 
     | 
    
         
             
            - test/test_output.rb
         
     | 
| 
      
 204 
     | 
    
         
            +
            - test/test_recipes.rb
         
     | 
| 
       196 
205 
     | 
    
         
             
            - test/test_requests.rb
         
     | 
| 
       197 
206 
     | 
    
         
             
            - test/test_utf8.rb
         
     | 
| 
       198 
207 
     | 
    
         
             
            homepage: http://github.com/gurgeous/sinew
         
     | 
| 
         @@ -225,6 +234,14 @@ test_files: 
     | 
|
| 
       225 
234 
     | 
    
         
             
            - test/legacy/eu.httpbin.org/redirect,3
         
     | 
| 
       226 
235 
     | 
    
         
             
            - test/legacy/eu.httpbin.org/status,500
         
     | 
| 
       227 
236 
     | 
    
         
             
            - test/legacy/legacy.sinew
         
     | 
| 
      
 237 
     | 
    
         
            +
            - test/recipes/array_header.sinew
         
     | 
| 
      
 238 
     | 
    
         
            +
            - test/recipes/basic.sinew
         
     | 
| 
      
 239 
     | 
    
         
            +
            - test/recipes/dups.sinew
         
     | 
| 
      
 240 
     | 
    
         
            +
            - test/recipes/implicit_header.sinew
         
     | 
| 
      
 241 
     | 
    
         
            +
            - test/recipes/limit.sinew
         
     | 
| 
      
 242 
     | 
    
         
            +
            - test/recipes/noko.sinew
         
     | 
| 
      
 243 
     | 
    
         
            +
            - test/recipes/uri.sinew
         
     | 
| 
      
 244 
     | 
    
         
            +
            - test/recipes/xml.sinew
         
     | 
| 
       228 
245 
     | 
    
         
             
            - test/test.html
         
     | 
| 
       229 
246 
     | 
    
         
             
            - test/test_cache.rb
         
     | 
| 
       230 
247 
     | 
    
         
             
            - test/test_helper.rb
         
     | 
| 
         @@ -232,5 +249,6 @@ test_files: 
     | 
|
| 
       232 
249 
     | 
    
         
             
            - test/test_main.rb
         
     | 
| 
       233 
250 
     | 
    
         
             
            - test/test_nokogiri_ext.rb
         
     | 
| 
       234 
251 
     | 
    
         
             
            - test/test_output.rb
         
     | 
| 
      
 252 
     | 
    
         
            +
            - test/test_recipes.rb
         
     | 
| 
       235 
253 
     | 
    
         
             
            - test/test_requests.rb
         
     | 
| 
       236 
254 
     | 
    
         
             
            - test/test_utf8.rb
         
     |