brand2csv 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/lib/brand2csv.rb +159 -103
- data/lib/brand2csv/version.rb +1 -1
- data/protocol.2013.05.21.textile +40 -0
- metadata +2 -2
    
        data/History.txt
    CHANGED
    
    
    
        data/lib/brand2csv.rb
    CHANGED
    
    | @@ -1,11 +1,12 @@ | |
| 1 1 | 
             
            #!/usr/bin/env ruby
         | 
| 2 2 | 
             
            # encoding: utf-8
         | 
| 3 | 
            -
             | 
| 3 | 
            +
            require 'rubygems' if /^1\.8/.match(RUBY_VERSION)
         | 
| 4 4 | 
             
            require "brand2csv/version"
         | 
| 5 5 | 
             
            require 'mechanize'
         | 
| 6 6 | 
             
            require 'prettyprint'
         | 
| 7 7 | 
             
            require 'optparse'
         | 
| 8 8 | 
             
            require 'csv'
         | 
| 9 | 
            +
            require 'logger'
         | 
| 9 10 |  | 
| 10 11 | 
             
            module Brand2csv
         | 
| 11 12 |  | 
| @@ -17,12 +18,12 @@ module Brand2csv | |
| 17 18 | 
             
                  # Weitere gesehene Fehler
         | 
| 18 19 | 
             
                BekannteFehler = 
         | 
| 19 20 | 
             
                      ['Das Datum ist ung', # ültig'
         | 
| 20 | 
            -
                       'Erweiterte Suche',
         | 
| 21 21 | 
             
                       'Vereinfachte Trefferliste anzeigen',
         | 
| 22 22 | 
             
                        'Es wurden keine Daten gefunden.',
         | 
| 23 23 | 
             
                        'Die Suchkriterien sind teilweise unzul', # ässig',
         | 
| 24 24 | 
             
                        'Geben Sie mindestens ein Suchkriterium ein',
         | 
| 25 25 | 
             
                        'Die Suche wurde abgebrochen, da die maximale Suchzeit von 60 Sekunden',
         | 
| 26 | 
            +
                       'Erweiterte Suche',
         | 
| 26 27 | 
             
                      ]
         | 
| 27 28 | 
             
                Base_uri = 'https://www.swissreg.ch'
         | 
| 28 29 | 
             
                Start_uri = "#{Base_uri}/srclient/faces/jsp/start.jsp"
         | 
| @@ -41,42 +42,80 @@ module Brand2csv | |
| 41 42 | 
             
                        # "tm_lbl_licensee"], # Lizenznehmer/in
         | 
| 42 43 | 
             
                        "tm_lbl_app_date", # Hinterlegungsdatum
         | 
| 43 44 | 
             
                        ]
         | 
| 45 | 
            +
                # Alle Felder mit sprechenden Namen
         | 
| 46 | 
            +
                # ["id_swissreg:mainContent:id_txf_tm_no", nummer],# Marken Nr
         | 
| 47 | 
            +
                # ["id_swissreg:mainContent:id_txf_app_no", ""],                       # Gesuch Nr.
         | 
| 48 | 
            +
                # ["id_swissreg:mainContent:id_txf_tm_text", marke],
         | 
| 49 | 
            +
                # ["id_swissreg:mainContent:id_txf_applicant", ""],                    # Inhaber/in
         | 
| 50 | 
            +
                # ["id_swissreg:mainContent:id_cbxCountry", "_ALL"], # Auswahl Länder _ALL
         | 
| 51 | 
            +
                # ["id_swissreg:mainContent:id_txf_agent", ""],                         # Vertreter/in
         | 
| 52 | 
            +
                # ["id_swissreg:mainContent:id_txf_licensee", ""], # Lizenznehmer
         | 
| 53 | 
            +
                # ["id_swissreg:mainContent:id_txf_nizza_class", ""], # Nizza Klassifikation Nr.
         | 
| 54 | 
            +
                #      # ["id_swissreg:mainContent:id_txf_appDate", timespan], # Hinterlegungsdatum
         | 
| 55 | 
            +
                # ["id_swissreg:mainContent:id_txf_appDate",  "%s" % timespan] ,
         | 
| 56 | 
            +
                # ["id_swissreg:mainContent:id_txf_expiryDate", ""], # Ablauf Schutzfrist
         | 
| 57 | 
            +
                # Markenart: Individualmarke 1 Kollektivmarke 2 Garantiemarke 3
         | 
| 58 | 
            +
                # ["id_swissreg:mainContent:id_cbxTMTypeGrp", "_ALL"],  # Markenart
         | 
| 59 | 
            +
                # ["id_swissreg:mainContent:id_cbxTMForm", "_ALL"],  # Markentyp
         | 
| 60 | 
            +
                # ["id_swissreg:mainContent:id_cbxTMColorClaim", "_ALL"],  # Farbanspruch
         | 
| 61 | 
            +
                # ["id_swissreg:mainContent:id_txf_pub_date", ""], # Publikationsdatum
         | 
| 44 62 |  | 
| 63 | 
            +
                # info zu Publikationsgrund id_swissreg:mainContent:id_ckbTMPubReason
         | 
| 64 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMPubReason", "1"], #Neueintragungen
         | 
| 65 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMPubReason", "2"], #Berichtigungen
         | 
| 66 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMPubReason", "3"], #Verlängerungen
         | 
| 67 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMPubReason", "4"], #Löschungen
         | 
| 68 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMPubReason", "5"], #Inhaberänderungen
         | 
| 69 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMPubReason", "6"], #Vertreteränderungen
         | 
| 70 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMPubReason", "7"], #Lizenzänderungen
         | 
| 71 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMPubReason", "8"], #Weitere Registeränderungen
         | 
| 72 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMEmptyHits", "0"],  # Leere Trefferliste anzeigen
         | 
| 73 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMState", "1"], # "Hängige Gesuche 1
         | 
| 74 | 
            +
                #      # ["id_swissreg:mainContent:id_ckbTMState", "2"], # "Gelöschte Gesuche 2
         | 
| 75 | 
            +
                # ["id_swissreg:mainContent:id_ckbTMState", "3"], # aktive Marken 3 
         | 
| 76 | 
            +
                #      # ["id_swissreg:mainContent:id_ckbTMState", "4"], # gelöschte Marken 4
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                
         | 
| 45 79 | 
             
                MaxZeilen = 5
         | 
| 46 80 |  | 
| 47 81 | 
             
                attr_accessor :marke
         | 
| 48 82 |  | 
| 49 83 | 
             
                def initialize(timespan)
         | 
| 50 84 | 
             
                  @timespan = timespan
         | 
| 85 | 
            +
                  @marke = nil
         | 
| 86 | 
            +
                  @number = nil
         | 
| 87 | 
            +
                  @hitsPerPage = 100
         | 
| 88 | 
            +
                  
         | 
| 51 89 | 
             
                  @agent = Mechanize.new { |agent|
         | 
| 52 | 
            -
                  #  agent.user_agent_alias = 'Mac Safari'
         | 
| 53 90 | 
             
                    agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:16.0) Gecko/20100101 Firefox/16.0'
         | 
| 54 | 
            -
                  #  agent.redirection_limit   = 5
         | 
| 55 91 | 
             
                    agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
         | 
| 92 | 
            +
                    FileUtils.makedirs 'mechanize' if $VERBOSE
         | 
| 93 | 
            +
                    agent.log = Logger.new("mechanize/mechanize.log") if $VERBOSE
         | 
| 56 94 | 
             
                  }
         | 
| 57 95 | 
             
                  @results = []
         | 
| 58 96 | 
             
                  @errors  = Hash.new
         | 
| 59 | 
            -
                  @lastResponse = nil
         | 
| 60 97 | 
             
                  @lastDetail =nil
         | 
| 61 98 | 
             
                  @counterDetails = 0
         | 
| 62 | 
            -
                   | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 99 | 
            +
                  if false # force some values
         | 
| 100 | 
            +
                    # asp* => 138 records werden geholt
         | 
| 101 | 
            +
                    # a* => Es wurden 25,490 Treffer gefunden. Davon werden 10000 zufällig ausgewählte Schutztitel angezeigt. Bitte schränken Sie Ihre Suche weiter ein.
         | 
| 102 | 
            +
                    #       Ab 501 Treffer wird eine vereinfachte Trefferliste angezeigt.  
         | 
| 103 | 
            +
                    # asp* => 138 records werden geholt
         | 
| 67 104 |  | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 72 | 
            -
            #      @marke = "*WEIH*"
         | 
| 73 | 
            -
             | 
| 105 | 
            +
                    @marke = 'zzzyyzzzzyzzyz*' # => Fehlermeldung: Es wurden keine Daten gefunden
         | 
| 106 | 
            +
                    @marke = 'aspira' 
         | 
| 107 | 
            +
                    # @number = '500000' # für Weihnachten
         | 
| 108 | 
            +
                    @number = ' 601416' # für aspira
         | 
| 109 | 
            +
              #      @marke = "*WEIH*"
         | 
| 110 | 
            +
                    @timespan = nil
         | 
| 111 | 
            +
                  end
         | 
| 112 | 
            +
                  @marke = 'asp*'
         | 
| 74 113 | 
             
                end
         | 
| 75 114 |  | 
| 76 | 
            -
                def writeResponse(filename | 
| 77 | 
            -
                  if defined?(RSpec)
         | 
| 115 | 
            +
                def writeResponse(filename)
         | 
| 116 | 
            +
                  if defined?(RSpec) or $VERBOSE
         | 
| 78 117 | 
             
                    ausgabe = File.open(filename, 'w+')
         | 
| 79 | 
            -
                    ausgabe.puts body
         | 
| 118 | 
            +
                    ausgabe.puts @agent.page.body
         | 
| 80 119 | 
             
                    ausgabe.close
         | 
| 81 120 | 
             
                  else
         | 
| 82 121 | 
             
                    puts "Skipping writing #{filename}" if $VERBOSE
         | 
| @@ -84,11 +123,12 @@ module Brand2csv | |
| 84 123 | 
             
                end
         | 
| 85 124 |  | 
| 86 125 | 
             
                def view_state(response)
         | 
| 87 | 
            -
                  if match | 
| 88 | 
            -
                    match[ | 
| 126 | 
            +
                  if /^1\.8/.match(RUBY_VERSION)
         | 
| 127 | 
            +
                    match = /javax.faces.ViewState.*?value="([^"]+)"/u.match(response)
         | 
| 89 128 | 
             
                  else
         | 
| 90 | 
            -
                    ""
         | 
| 129 | 
            +
                    match = /javax.faces.ViewState.*?value="([^"]+)"/u.match(response.force_encoding('utf-8'))
         | 
| 91 130 | 
             
                  end
         | 
| 131 | 
            +
                  match ? match[1] : ""
         | 
| 92 132 | 
             
                end
         | 
| 93 133 |  | 
| 94 134 | 
             
                def checkErrors(body)
         | 
| @@ -104,10 +144,14 @@ module Brand2csv | |
| 104 144 | 
             
                def parse_swissreg(timespan = @timespan,  # sollte 377 Treffer ergeben, für 01.06.2007-10.06.2007, 559271 wurde in diesem Zeitraum registriert
         | 
| 105 145 | 
             
                                  marke = @marke,    
         | 
| 106 146 | 
             
                                  nummer =@number) #  nummer = "559271" ergibt genau einen treffer
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                  # discard this first response
         | 
| 149 | 
            +
                  # swissreg.ch could not handle cookie by redirect.
         | 
| 150 | 
            +
                  # HTTP status code is also strange at redirection.
         | 
| 107 151 | 
             
                  @agent.get Start_uri  # get a cookie for the session
         | 
| 108 152 | 
             
                  content = @agent.get_file Start_uri
         | 
| 109 | 
            -
                   | 
| 110 | 
            -
                   | 
| 153 | 
            +
                  writeResponse('mechanize/start.jsp')
         | 
| 154 | 
            +
                  # get only view state
         | 
| 111 155 | 
             
                  @state = view_state(content)
         | 
| 112 156 | 
             
                  data = [
         | 
| 113 157 | 
             
                    ["autoScroll", "0,0"],
         | 
| @@ -116,10 +160,12 @@ module Brand2csv | |
| 116 160 | 
             
                    ["id_swissreg:_idcl", "id_swissreg_sub_nav_ipiNavigation_item0"],
         | 
| 117 161 | 
             
                    ["javax.faces.ViewState", @state],
         | 
| 118 162 | 
             
                  ]
         | 
| 119 | 
            -
             | 
| 163 | 
            +
                  
         | 
| 120 164 | 
             
                  content = @agent.post(Start_uri, data)  
         | 
| 121 | 
            -
                  writeResponse('mechanize/ | 
| 122 | 
            -
             | 
| 165 | 
            +
                  writeResponse('mechanize/start2.jsp')
         | 
| 166 | 
            +
                  # Navigation with mechanize like this fails and returns to the home page
         | 
| 167 | 
            +
                  # @agent.page.link_with(:id => "id_swissreg_sub_nav_ipiNavigation_item0").click
         | 
| 168 | 
            +
                  
         | 
| 123 169 | 
             
                  data = [
         | 
| 124 170 | 
             
                    ["autoScroll", "0,0"],
         | 
| 125 171 | 
             
                    ["id_swissreg:_link_hidden_", ""],
         | 
| @@ -130,64 +176,52 @@ module Brand2csv | |
| 130 176 | 
             
                  # sr1 ist die einfache suche, sr3 die erweiterte Suche
         | 
| 131 177 | 
             
                  @path = "/srclient/faces/jsp/trademark/sr3.jsp"
         | 
| 132 178 | 
             
                  response = @agent.post(Base_uri + @path, data)
         | 
| 133 | 
            -
                  writeResponse('mechanize/ | 
| 134 | 
            -
                   | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 137 | 
            -
             | 
| 138 | 
            -
             | 
| 139 | 
            -
             | 
| 140 | 
            -
             | 
| 141 | 
            -
             | 
| 142 | 
            -
             | 
| 143 | 
            -
             | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 146 | 
            -
             | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
             | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 158 | 
            -
             | 
| 159 | 
            -
             | 
| 160 | 
            -
             | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 167 | 
            -
             | 
| 168 | 
            -
             | 
| 169 | 
            -
             | 
| 170 | 
            -
             | 
| 171 | 
            -
             | 
| 172 | 
            -
             | 
| 173 | 
            -
             | 
| 174 | 
            -
             | 
| 175 | 
            -
             | 
| 176 | 
            -
             | 
| 177 | 
            -
             | 
| 178 | 
            -
             | 
| 179 | 
            -
                      puts "nummer ist #{timespan}" if nummer
         | 
| 180 | 
            -
                      @criteria <<   ["id_swissreg:mainContent:sub_fieldset:id_submit", "suchen"]
         | 
| 181 | 
            -
                      @criteria <<    ["id_swissreg_SUBMIT", "1"]
         | 
| 182 | 
            -
                      @criteria <<    ["id_swissreg:_idcl", ""]
         | 
| 183 | 
            -
                      @criteria <<    ["id_swissreg:_link_hidden_", ""]
         | 
| 184 | 
            -
                      @criteria <<    ["javax.faces.ViewState", @state]
         | 
| 185 | 
            -
                      
         | 
| 186 | 
            -
                  @path = "/srclient/faces/jsp/trademark/sr3.jsp"
         | 
| 187 | 
            -
                  response = @agent.post(Base_uri + @path, @criteria)
         | 
| 188 | 
            -
                  writeResponse('mechanize/resultate_1.html', response.body)
         | 
| 189 | 
            -
                  checkErrors(response.body)
         | 
| 190 | 
            -
                  @lastResponse = response
         | 
| 179 | 
            +
                  writeResponse('mechanize/sr3.jsp')
         | 
| 180 | 
            +
                  
         | 
| 181 | 
            +
                  # Fill out form values
         | 
| 182 | 
            +
                  @agent.page.form('id_swissreg').checkboxes.each{ |box| 
         | 
| 183 | 
            +
                                              TMChoiceFields.index(box.value) ? box.check : box.uncheck 
         | 
| 184 | 
            +
                                              box.check if $VERBOSE
         | 
| 185 | 
            +
                                              # select all publication reasons
         | 
| 186 | 
            +
                                              box.check if /id_ckbTMPubReason/.match(box.name)
         | 
| 187 | 
            +
                                              # select all publication states
         | 
| 188 | 
            +
                                              box.check if /id_ckbTMState/.match(box.name)
         | 
| 189 | 
            +
                                            }
         | 
| 190 | 
            +
                  if $VERBOSE # and false # fill all details for marke  567120        
         | 
| 191 | 
            +
                    # Felder, welche nie bei der Antwort auftauchen
         | 
| 192 | 
            +
                    @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_licensee') { |x| x.value = 'BBB Inc*' }
         | 
| 193 | 
            +
                    @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_expiryDate') { |x| x.value = timespan }
         | 
| 194 | 
            +
                    @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_pub_date') { |x| x.value = timespan }
         | 
| 195 | 
            +
                    @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_nizza_class') { |x| x.value = '9' }      
         | 
| 196 | 
            +
                    @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_agent') { |x| x.value = 'Marc Stucki*' }
         | 
| 197 | 
            +
                    @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_cbxCountry') { |x| x.value = 'CH' }  # 'CH' or '_ALL'
         | 
| 198 | 
            +
             | 
| 199 | 
            +
                    # Felder, welche im Resultat angezeigt werden
         | 
| 200 | 
            +
                    @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_applicant') { |x| x.value = 'ASP ATON*' } #inhaber
         | 
| 201 | 
            +
                    @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_tm_no') { |x| x.value = "567120" }
         | 
| 202 | 
            +
                    @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_app_no') { |x| x.value = '50329/2008' }
         | 
| 203 | 
            +
                  end
         | 
| 204 | 
            +
                  
         | 
| 205 | 
            +
                  # Feld, welches im Resultat angezeigt wird
         | 
| 206 | 
            +
                  @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_tm_text') { |x| x.value = "asp*" }
         | 
| 207 | 
            +
                  
         | 
| 208 | 
            +
                  # Felder, welches nie bei der Antwort auftaucht. Ein Versuch .gsub('.', '%2E') schlug ebenfalls fehl!
         | 
| 209 | 
            +
                  @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_txf_appDate') { |x| x.value = timespan}
         | 
| 210 | 
            +
                  
         | 
| 211 | 
            +
                  # Feld, welches ebenfalls berücksichtigt wird
         | 
| 212 | 
            +
                  @agent.page.form('id_swissreg').field(:name => 'id_swissreg:mainContent:id_cbxHitsPerPage') { |x| x.value = @hitsPerPage }
         | 
| 213 | 
            +
                  @agent.page.form('id_swissreg').field(:name => 'autoScroll') { |x| x.value = '0,0' }
         | 
| 214 | 
            +
                  
         | 
| 215 | 
            +
                  if $VERBOSE
         | 
| 216 | 
            +
                    puts "State of searchForm is:"
         | 
| 217 | 
            +
                    @agent.page.form('id_swissreg').fields.each{ |f| puts "field: #{f.name}: #{f.value}"}  
         | 
| 218 | 
            +
                    @agent.page.form('id_swissreg').checkboxes.each{ |box| puts "#{box.name} checked? #{box.checked}"} 
         | 
| 219 | 
            +
                  end
         | 
| 220 | 
            +
                  
         | 
| 221 | 
            +
                  @agent.page.form('id_swissreg').click_button(@agent.page.form('id_swissreg').button_with(:value => "suchen"))
         | 
| 222 | 
            +
                  # Hier sollten eigentlich alle Felder auftauchen, wie
         | 
| 223 | 
            +
                  # Marke=asp*; Land (Inhaber/in)=Schweiz; Markenart=Alle; Markentyp=Alle; Farbanspruch=Alle; Publikationsgrund= Neueintragungen, Berichtigungen, Verlängerungen, Löschungen, Inhaberänderungen, Vertreteränderungen, Lizenzänderungen, Weitere Registeränderungen; Status= hängige Gesuche, aktive Marken
         | 
| 224 | 
            +
                  writeResponse('mechanize/result.jsp')
         | 
| 191 225 | 
             
                end
         | 
| 192 226 |  | 
| 193 227 | 
             
                def parseAddress(nummer, zeilen)
         | 
| @@ -197,15 +231,15 @@ module Brand2csv | |
| 197 231 | 
             
                  # Search for plz/address
         | 
| 198 232 | 
             
                  1.upto(zeilen.length-1).each  {
         | 
| 199 233 | 
             
                    |cnt|
         | 
| 200 | 
            -
             | 
| 201 | 
            -
             | 
| 202 | 
            -
             | 
| 203 | 
            -
             | 
| 204 | 
            -
             | 
| 205 | 
            -
             | 
| 234 | 
            +
                      if    m = AddressRegexp.match(zeilen[cnt])
         | 
| 235 | 
            +
                        zeilen[cnt+1] = nil
         | 
| 236 | 
            +
                        plz = m[1]; ort = m[2]
         | 
| 237 | 
            +
                        cnt.upto(MaxZeilen-1).each{ |cnt2| zeilen[cnt2] = nil }
         | 
| 238 | 
            +
                        break
         | 
| 239 | 
            +
                      end
         | 
| 206 240 | 
             
                  }
         | 
| 207 241 | 
             
                  unless plz
         | 
| 208 | 
            -
                    puts "Achtung! Konnte Marke #{nummer} mit Inhaber #{zeilen. | 
| 242 | 
            +
                    puts "Achtung! Konnte Marke #{nummer} mit Inhaber #{zeilen.inspect} nicht parsen" if $VERBOSE
         | 
| 209 243 | 
             
                    return nil,   nil,     nil,     nil,     nil,     nil,     nil, nil
         | 
| 210 244 | 
             
                  end
         | 
| 211 245 | 
             
                  # search for lines with only digits
         | 
| @@ -239,7 +273,7 @@ module Brand2csv | |
| 239 273 | 
             
                    url = "https://www.swissreg.ch/srclient/faces/jsp/trademark/sr300.jsp?language=de§ion=tm&id=#{nummer}"
         | 
| 240 274 | 
             
                    pp "Opening #{url}" if $VERBOSE
         | 
| 241 275 | 
             
                    content = @agent.get_file url
         | 
| 242 | 
            -
                    writeResponse("mechanize/detail_#{nummer}.html" | 
| 276 | 
            +
                    writeResponse("mechanize/detail_#{nummer}.html")
         | 
| 243 277 | 
             
                    doc = Nokogiri::Slop(content)
         | 
| 244 278 | 
             
                  end
         | 
| 245 279 | 
             
                  puts "Bitte um Geduld. Holte Adressdetails für Marke #{nummer}. (#{@counterDetails} von #{@errors.size})"
         | 
| @@ -271,7 +305,9 @@ module Brand2csv | |
| 271 305 | 
             
                  if filename
         | 
| 272 306 | 
             
                    doc = Nokogiri::Slop(File.open(filename))        
         | 
| 273 307 | 
             
                  else
         | 
| 274 | 
            -
             | 
| 308 | 
            +
                    body = @agent.page.body
         | 
| 309 | 
            +
                    body.force_encoding('utf-8')
         | 
| 310 | 
            +
                    doc = Nokogiri::Slop(body)
         | 
| 275 311 | 
             
                  end
         | 
| 276 312 | 
             
                  nrFailures = 0
         | 
| 277 313 | 
             
                  counter += 1
         | 
| @@ -322,9 +358,8 @@ module Brand2csv | |
| 322 358 | 
             
                    ]
         | 
| 323 359 | 
             
                    TMChoiceFields.each{ | field2display| data << ["id_swissreg:mainContent:id_sub_options_result:id_ckbTMChoice", field2display] }
         | 
| 324 360 | 
             
                    response = @agent.post(Base_uri + @path, data)
         | 
| 325 | 
            -
                    writeResponse("mechanize/resultate_#{counter}.html" | 
| 361 | 
            +
                    writeResponse("mechanize/resultate_#{counter}.html")
         | 
| 326 362 | 
             
                    checkErrors(response.body)
         | 
| 327 | 
            -
                    @lastResponse = response
         | 
| 328 363 | 
             
                    fetchresult(nil, counter)
         | 
| 329 364 | 
             
                  else
         | 
| 330 365 | 
             
                    puts "Es gab #{nrFailures} Fehler beim Lesen von #{filename}"  if $VERBOSE
         | 
| @@ -334,15 +369,36 @@ module Brand2csv | |
| 334 369 |  | 
| 335 370 | 
             
                def emitCsv(filename='ausgabe.csv')
         | 
| 336 371 | 
             
                  return if @results.size == 0
         | 
| 337 | 
            -
                   | 
| 338 | 
            -
             | 
| 339 | 
            -
             | 
| 340 | 
            -
             | 
| 341 | 
            -
                    @results.each{ | | 
| 372 | 
            +
                  if /^1\.8/.match(RUBY_VERSION)
         | 
| 373 | 
            +
                    ausgabe = File.open(filename, 'w+')
         | 
| 374 | 
            +
                    # Write header
         | 
| 375 | 
            +
                    s=''
         | 
| 376 | 
            +
                    @results[0].members.each { |member| s += member + ';' }
         | 
| 377 | 
            +
                    ausgabe.puts s.chop
         | 
| 378 | 
            +
                    # write all line
         | 
| 379 | 
            +
                    @results.each{ 
         | 
| 380 | 
            +
                      |result| 
         | 
| 381 | 
            +
                        s = ''
         | 
| 382 | 
            +
                        result.members.each{ |member| 
         | 
| 383 | 
            +
                                              unless eval("result.#{member}") 
         | 
| 384 | 
            +
                                                s += ';'
         | 
| 385 | 
            +
                                              else
         | 
| 386 | 
            +
                                                value = eval("result.#{member.to_s}")
         | 
| 387 | 
            +
                                                value = "\"#{value}\"" if value.index(';')
         | 
| 388 | 
            +
                                                s += value + ';' 
         | 
| 389 | 
            +
                                              end
         | 
| 390 | 
            +
                                           }
         | 
| 391 | 
            +
                        ausgabe.puts s.chop
         | 
| 392 | 
            +
                    }        
         | 
| 393 | 
            +
                  else
         | 
| 394 | 
            +
                    CSV.open(filename,  'w', :headers=>@results[0].members,
         | 
| 395 | 
            +
                                              :write_headers => true,
         | 
| 396 | 
            +
                                              :col_sep => ';'
         | 
| 397 | 
            +
                                            ) do |csv| @results.each{ |x| csv << x }
         | 
| 398 | 
            +
                    end
         | 
| 342 399 | 
             
                  end
         | 
| 343 | 
            -
                  puts "Speicherte #{@results.size} gefunden Datensätze für die Zeitspanne '#{@timespan}' in #{filename}"
         | 
| 344 400 | 
             
                end
         | 
| 345 | 
            -
             | 
| 401 | 
            +
                  
         | 
| 346 402 | 
             
                def fetchMissingDetails
         | 
| 347 403 | 
             
                  @errors.each{ 
         | 
| 348 404 | 
             
                    |markennummer, info|
         | 
    
        data/lib/brand2csv/version.rb
    CHANGED
    
    
    
        data/protocol.2013.05.21.textile
    CHANGED
    
    | @@ -1,3 +1,43 @@ | |
| 1 | 
            +
            * Work on 2013.05.22
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            ** Use timespan als filename
         | 
| 4 | 
            +
            ** corrected missing bin/brand2csv for gem
         | 
| 5 | 
            +
            ** Better handling of adresslines like '90 route de Frontenex', 'Via San Salvatore, 2
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            Trimmed script for more logging in VERBOSE mode. Therefore to debug I always call the following stuff
         | 
| 8 | 
            +
            @rm -rf mechanize/; time ruby -v bin/brand2csv 05.09.2001@
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            * Verified the following things
         | 
| 11 | 
            +
            ** Get/Post request via HttpFox (see logs/protocol_swissreg.log)
         | 
| 12 | 
            +
            ** Content of POST-Data via mechanize and HttpFox (identical except)
         | 
| 13 | 
            +
            ** Tagged this commit as "POST_verified"
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            *"Cheetsheet":http://www.e-tobi.net/blog/files/ruby-mechanize-cheat-sheet.pdf
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            Found, that the following fields are handled correctly
         | 
| 18 | 
            +
            # id_txf_applicant' #inhaber
         | 
| 19 | 
            +
            # id_txf_tm_no # Markn-Nummber
         | 
| 20 | 
            +
            # id_txf_app_no # Gesuch-nummer 
         | 
| 21 | 
            +
            # id_txf_tm_text # text of trademark
         | 
| 22 | 
            +
            # id_cbxHitsPerPage 
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            Fields, which are not handled and don't influence the response
         | 
| 25 | 
            +
            # id_txf_appDate # Hinterlegungsdatum
         | 
| 26 | 
            +
            # id_txf_licensee 
         | 
| 27 | 
            +
            # id_txf_expiryDate
         | 
| 28 | 
            +
            # id_txf_pub_date
         | 
| 29 | 
            +
            # id_txf_nizza_class
         | 
| 30 | 
            +
            # id_txf_agent        # Vertreter
         | 
| 31 | 
            +
            # id_cbxCountry       # Land 'CH' or '_ALL'
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            * Reworked the brand2csv.rb to match more closely the interactive
         | 
| 34 | 
            +
            Checked differences in received fields from mechanize and interactive uses (in subdirectory logs)
         | 
| 35 | 
            +
            * start.jsp: Match okay
         | 
| 36 | 
            +
            * start2.jsp: Match okay
         | 
| 37 | 
            +
            * sr3.jsp: are quite different
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            * Work on 2013.05.21
         | 
| 40 | 
            +
             | 
| 1 41 | 
             
            * Added spike_mechanize_swissreg.rb
         | 
| 2 42 |  | 
| 3 43 | 
             
            Also this attempt works if I specify a trademark name or number. But it does not work, when specifying a timespan for the "Hinterlegungsdatum".
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: brand2csv
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.5
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2013-05- | 
| 12 | 
            +
            date: 2013-05-23 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: mechanize
         |