geo_combine 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +53 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +20 -0
- data/.rubocop_todo.yml +165 -0
- data/Gemfile +2 -1
- data/README.md +20 -2
- data/Rakefile +4 -2
- data/bin/geocombine +1 -0
- data/geo_combine.gemspec +6 -1
- data/lib/geo_combine/bounding_box.rb +7 -1
- data/lib/geo_combine/ckan_metadata.rb +10 -8
- data/lib/geo_combine/cli.rb +3 -1
- data/lib/geo_combine/esri_open_data.rb +2 -0
- data/lib/geo_combine/exceptions.rb +3 -0
- data/lib/geo_combine/fgdc.rb +2 -2
- data/lib/geo_combine/formats.rb +2 -0
- data/lib/geo_combine/formatting.rb +3 -1
- data/lib/geo_combine/geo_blacklight_harvester.rb +21 -13
- data/lib/geo_combine/geoblacklight.rb +20 -6
- data/lib/geo_combine/geometry_types.rb +2 -0
- data/lib/geo_combine/iso19139.rb +2 -1
- data/lib/geo_combine/ogp.rb +13 -11
- data/lib/geo_combine/railtie.rb +2 -0
- data/lib/geo_combine/subjects.rb +2 -0
- data/lib/geo_combine/version.rb +3 -1
- data/lib/geo_combine.rb +4 -3
- data/lib/tasks/geo_combine.rake +50 -29
- data/lib/xslt/fgdc2html.xsl +38 -9
- data/spec/features/fgdc2html_spec.rb +53 -1
- data/spec/features/iso2html_spec.rb +10 -1
- data/spec/fixtures/docs/princeton_fgdc.xml +374 -0
- data/spec/fixtures/docs/repos.json +3224 -0
- data/spec/fixtures/docs/simple_xml.xml +10 -0
- data/spec/fixtures/docs/simple_xslt.xsl +11 -0
- data/spec/fixtures/docs/stanford_iso.xml +652 -0
- data/spec/fixtures/docs/tufts_fgdc.xml +977 -0
- data/spec/fixtures/indexing/basic_geoblacklight.json +27 -0
- data/spec/fixtures/indexing/geoblacklight.json +33 -0
- data/spec/fixtures/indexing/layers.json +16119 -0
- data/spec/fixtures/indexing/test.txt +1 -0
- data/spec/fixtures/json_docs.rb +2 -0
- data/spec/fixtures/xml_docs.rb +9 -1659
- data/spec/helpers.rb +7 -7
- data/spec/lib/geo_combine/bounding_box_spec.rb +18 -0
- data/spec/lib/geo_combine/ckan_metadata_spec.rb +34 -11
- data/spec/lib/geo_combine/esri_open_data_spec.rb +23 -2
- data/spec/lib/geo_combine/fgdc_spec.rb +41 -10
- data/spec/lib/geo_combine/formatting_spec.rb +13 -5
- data/spec/lib/geo_combine/geo_blacklight_harvester_spec.rb +30 -26
- data/spec/lib/geo_combine/geoblacklight_spec.rb +41 -11
- data/spec/lib/geo_combine/iso19139_spec.rb +26 -14
- data/spec/lib/geo_combine/ogp_spec.rb +28 -8
- data/spec/lib/geo_combine_spec.rb +7 -4
- data/spec/lib/tasks/geo_combine_spec.rb +45 -0
- data/spec/spec_helper.rb +19 -84
- data/spec/support/fixtures.rb +9 -0
- metadata +116 -21
- data/.coveralls.yml +0 -1
- data/.travis.yml +0 -8
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 7d47d9cff1e3bf0e3ec2237c65554524b9a624d8c9faf833d1c99c6fdc4f7c94
         | 
| 4 | 
            +
              data.tar.gz: 7069057b0b5166f2ed5496af51a270cefdd6ee5aa3937456f10a54b2f5f32536
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 7208f9b13e73b183571861a7d40bcd6a1c1a466b4af7ff6fead7174bdb992a51955ebb986b07094ce195dd2e1d520a3c8b0b51b7c18178b57b3b0eb6db0c6e4e
         | 
| 7 | 
            +
              data.tar.gz: 7daf75a1d31036a3659d4e96f261f4f1ce20bfea2083827f32e767ee8f15c14d6c112053cd009ca17cdfc6c433b9f7b64ce524fbf4fdaf7fe56877b1b7c07360
         | 
| @@ -0,0 +1,53 @@ | |
| 1 | 
            +
            name: CI
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            on: [push]
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            jobs:
         | 
| 6 | 
            +
              rubocop:
         | 
| 7 | 
            +
                runs-on: ubuntu-latest
         | 
| 8 | 
            +
                steps:
         | 
| 9 | 
            +
                - uses: actions/checkout@v2
         | 
| 10 | 
            +
                - name: Set up Ruby
         | 
| 11 | 
            +
                  uses: ruby/setup-ruby@v1
         | 
| 12 | 
            +
                  with:
         | 
| 13 | 
            +
                    ruby-version: 2.7
         | 
| 14 | 
            +
                - name: Install dependencies
         | 
| 15 | 
            +
                  run: bundle install
         | 
| 16 | 
            +
                - name: Run linter
         | 
| 17 | 
            +
                  run: bundle exec rubocop
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              test:
         | 
| 20 | 
            +
                runs-on: ubuntu-latest
         | 
| 21 | 
            +
                strategy:
         | 
| 22 | 
            +
                  matrix:
         | 
| 23 | 
            +
                    ruby: [2.7, 3.0, 3.1]
         | 
| 24 | 
            +
                    faraday_version: [''] # Defaults to whatever's the most recent version.
         | 
| 25 | 
            +
                    include:
         | 
| 26 | 
            +
                      - ruby: 2.7
         | 
| 27 | 
            +
                        faraday_version: '~> 1.0'
         | 
| 28 | 
            +
                steps:
         | 
| 29 | 
            +
                - uses: actions/checkout@v2
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                - name: Set up Ruby
         | 
| 32 | 
            +
                  uses: ruby/setup-ruby@v1
         | 
| 33 | 
            +
                  with:
         | 
| 34 | 
            +
                    ruby-version: ${{ matrix.ruby }}
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                - name: Install bundler
         | 
| 37 | 
            +
                  run: gem install bundler -v 2.1.1
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                - name: Install dependencies
         | 
| 40 | 
            +
                  run: bundle _2.1.1_ install
         | 
| 41 | 
            +
                  env:
         | 
| 42 | 
            +
                    FARADAY_VERSION: ${{ matrix.faraday_version }}
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                - name: Run tests
         | 
| 45 | 
            +
                  run: bundle exec rake spec
         | 
| 46 | 
            +
                  env:
         | 
| 47 | 
            +
                    FARADAY_VERSION: ${{ matrix.faraday_version }}
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                - name: Upload coverage artifacts
         | 
| 50 | 
            +
                  uses: actions/upload-artifact@v2
         | 
| 51 | 
            +
                  with:
         | 
| 52 | 
            +
                    name: coverage
         | 
| 53 | 
            +
                    path: coverage/
         | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/.rubocop.yml
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            require:
         | 
| 2 | 
            +
              - rubocop-rspec
         | 
| 3 | 
            +
              - rubocop-rake
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            inherit_from: .rubocop_todo.yml
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            AllCops:
         | 
| 8 | 
            +
              TargetRubyVersion: 2.7
         | 
| 9 | 
            +
              DisplayCopNames: true
         | 
| 10 | 
            +
              NewCops: enable
         | 
| 11 | 
            +
              Exclude:
         | 
| 12 | 
            +
              - 'geo_combine.gemspec'
         | 
| 13 | 
            +
              - 'tmp/**/*'
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            RSpec/DescribeClass:
         | 
| 16 | 
            +
              Enabled: false
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            RSpec/BeforeAfterAll:
         | 
| 19 | 
            +
              Exclude:
         | 
| 20 | 
            +
                - 'spec/lib/tasks/geo_combine_spec.rb'
         | 
    
        data/.rubocop_todo.yml
    ADDED
    
    | @@ -0,0 +1,165 @@ | |
| 1 | 
            +
            # This configuration was generated by
         | 
| 2 | 
            +
            # `rubocop --auto-gen-config`
         | 
| 3 | 
            +
            # on 2022-02-17 18:38:52 UTC using RuboCop version 1.25.1.
         | 
| 4 | 
            +
            # The point is for the user to remove these configuration records
         | 
| 5 | 
            +
            # one by one as the offenses are removed from the code base.
         | 
| 6 | 
            +
            # Note that changes in the inspected code, or installation of new
         | 
| 7 | 
            +
            # versions of RuboCop, may require this file to be generated again.
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # Offense count: 1
         | 
| 10 | 
            +
            Lint/RescueException:
         | 
| 11 | 
            +
              Exclude:
         | 
| 12 | 
            +
                - 'spec/helpers.rb'
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            # Offense count: 1
         | 
| 15 | 
            +
            Lint/UselessAssignment:
         | 
| 16 | 
            +
              Exclude:
         | 
| 17 | 
            +
                - 'spec/helpers.rb'
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            # Offense count: 7
         | 
| 20 | 
            +
            # Configuration parameters: IgnoredMethods, CountRepeatedAttributes.
         | 
| 21 | 
            +
            Metrics/AbcSize:
         | 
| 22 | 
            +
              Max: 33
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            # Offense count: 25
         | 
| 25 | 
            +
            # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
         | 
| 26 | 
            +
            # IgnoredMethods: refine
         | 
| 27 | 
            +
            Metrics/BlockLength:
         | 
| 28 | 
            +
              Max: 181
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            # Offense count: 1
         | 
| 31 | 
            +
            # Configuration parameters: CountComments, CountAsOne.
         | 
| 32 | 
            +
            Metrics/ClassLength:
         | 
| 33 | 
            +
              Max: 152
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            # Offense count: 3
         | 
| 36 | 
            +
            # Configuration parameters: IgnoredMethods.
         | 
| 37 | 
            +
            Metrics/CyclomaticComplexity:
         | 
| 38 | 
            +
              Max: 11
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            # Offense count: 10
         | 
| 41 | 
            +
            # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
         | 
| 42 | 
            +
            Metrics/MethodLength:
         | 
| 43 | 
            +
              Max: 21
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            # Offense count: 1
         | 
| 46 | 
            +
            # Configuration parameters: CountComments, CountAsOne.
         | 
| 47 | 
            +
            Metrics/ModuleLength:
         | 
| 48 | 
            +
              Max: 1657
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            # Offense count: 1
         | 
| 51 | 
            +
            # Configuration parameters: IgnoredMethods.
         | 
| 52 | 
            +
            Metrics/PerceivedComplexity:
         | 
| 53 | 
            +
              Max: 11
         | 
| 54 | 
            +
             | 
| 55 | 
            +
            # Offense count: 9
         | 
| 56 | 
            +
            # Configuration parameters: Prefixes.
         | 
| 57 | 
            +
            # Prefixes: when, with, without
         | 
| 58 | 
            +
            RSpec/ContextWording:
         | 
| 59 | 
            +
              Exclude:
         | 
| 60 | 
            +
                - 'spec/lib/geo_combine/geoblacklight_spec.rb'
         | 
| 61 | 
            +
                - 'spec/lib/geo_combine/ogp_spec.rb'
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            # Offense count: 9
         | 
| 64 | 
            +
            # Configuration parameters: CountAsOne.
         | 
| 65 | 
            +
            RSpec/ExampleLength:
         | 
| 66 | 
            +
              Max: 12
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            # Offense count: 4
         | 
| 69 | 
            +
            RSpec/ExpectInHook:
         | 
| 70 | 
            +
              Exclude:
         | 
| 71 | 
            +
                - 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
         | 
| 72 | 
            +
                - 'spec/lib/geo_combine/geoblacklight_spec.rb'
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            # Offense count: 1
         | 
| 75 | 
            +
            # Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
         | 
| 76 | 
            +
            # Include: **/*_spec*rb*, **/spec/**/*
         | 
| 77 | 
            +
            RSpec/FilePath:
         | 
| 78 | 
            +
              Exclude:
         | 
| 79 | 
            +
                - 'spec/lib/geo_combine_spec.rb'
         | 
| 80 | 
            +
             | 
| 81 | 
            +
            # Configuration parameters: .
         | 
| 82 | 
            +
            # SupportedStyles: have_received, receive
         | 
| 83 | 
            +
            RSpec/MessageSpies:
         | 
| 84 | 
            +
              EnforcedStyle: have_received
         | 
| 85 | 
            +
              Exclude:
         | 
| 86 | 
            +
                - 'spec/lib/geo_combine/esri_open_data_spec.rb'
         | 
| 87 | 
            +
                - 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
         | 
| 88 | 
            +
                - 'spec/lib/geo_combine/geoblacklight_spec.rb'
         | 
| 89 | 
            +
                - 'spec/lib/geo_combine/ogp_spec.rb'
         | 
| 90 | 
            +
                - 'spec/lib/geo_combine_spec.rb'
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            # Offense count: 39
         | 
| 93 | 
            +
            RSpec/MultipleExpectations:
         | 
| 94 | 
            +
              Max: 5
         | 
| 95 | 
            +
             | 
| 96 | 
            +
            # Offense count: 3
         | 
| 97 | 
            +
            # Configuration parameters: AllowSubject.
         | 
| 98 | 
            +
            RSpec/MultipleMemoizedHelpers:
         | 
| 99 | 
            +
              Max: 7
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            # Offense count: 5
         | 
| 102 | 
            +
            # Configuration parameters: IgnoreSharedExamples.
         | 
| 103 | 
            +
            RSpec/NamedSubject:
         | 
| 104 | 
            +
              Exclude:
         | 
| 105 | 
            +
                - 'spec/lib/geo_combine/formatting_spec.rb'
         | 
| 106 | 
            +
             | 
| 107 | 
            +
            # Offense count: 8
         | 
| 108 | 
            +
            RSpec/NestedGroups:
         | 
| 109 | 
            +
              Max: 4
         | 
| 110 | 
            +
             | 
| 111 | 
            +
            # Offense count: 1
         | 
| 112 | 
            +
            RSpec/OverwritingSetup:
         | 
| 113 | 
            +
              Exclude:
         | 
| 114 | 
            +
                - 'spec/lib/geo_combine/geoblacklight_spec.rb'
         | 
| 115 | 
            +
             | 
| 116 | 
            +
            # Offense count: 2
         | 
| 117 | 
            +
            RSpec/RepeatedExampleGroupBody:
         | 
| 118 | 
            +
              Exclude:
         | 
| 119 | 
            +
                - 'spec/lib/geo_combine/iso19139_spec.rb'
         | 
| 120 | 
            +
             | 
| 121 | 
            +
            # Offense count: 19
         | 
| 122 | 
            +
            RSpec/StubbedMock:
         | 
| 123 | 
            +
              Exclude:
         | 
| 124 | 
            +
                - 'spec/lib/geo_combine/esri_open_data_spec.rb'
         | 
| 125 | 
            +
                - 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
         | 
| 126 | 
            +
                - 'spec/lib/geo_combine/geoblacklight_spec.rb'
         | 
| 127 | 
            +
                - 'spec/lib/geo_combine/ogp_spec.rb'
         | 
| 128 | 
            +
                - 'spec/lib/geo_combine_spec.rb'
         | 
| 129 | 
            +
             | 
| 130 | 
            +
            # Offense count: 5
         | 
| 131 | 
            +
            RSpec/SubjectStub:
         | 
| 132 | 
            +
              Exclude:
         | 
| 133 | 
            +
                - 'spec/lib/geo_combine/ogp_spec.rb'
         | 
| 134 | 
            +
             | 
| 135 | 
            +
            # Offense count: 1
         | 
| 136 | 
            +
            # Configuration parameters: IgnoreNameless, IgnoreSymbolicNames.
         | 
| 137 | 
            +
            RSpec/VerifiedDoubles:
         | 
| 138 | 
            +
              Exclude:
         | 
| 139 | 
            +
                - 'spec/lib/geo_combine/geo_blacklight_harvester_spec.rb'
         | 
| 140 | 
            +
             | 
| 141 | 
            +
            # Offense count: 1
         | 
| 142 | 
            +
            Security/Open:
         | 
| 143 | 
            +
              Exclude:
         | 
| 144 | 
            +
                - 'lib/geo_combine/geoblacklight.rb'
         | 
| 145 | 
            +
             | 
| 146 | 
            +
            # Offense count: 7
         | 
| 147 | 
            +
            # Configuration parameters: AllowedConstants.
         | 
| 148 | 
            +
            Style/Documentation:
         | 
| 149 | 
            +
              Exclude:
         | 
| 150 | 
            +
                - 'spec/**/*'
         | 
| 151 | 
            +
                - 'test/**/*'
         | 
| 152 | 
            +
                - 'lib/geo_combine/bounding_box.rb'
         | 
| 153 | 
            +
                - 'lib/geo_combine/ckan_metadata.rb'
         | 
| 154 | 
            +
                - 'lib/geo_combine/cli.rb'
         | 
| 155 | 
            +
                - 'lib/geo_combine/geo_blacklight_harvester.rb'
         | 
| 156 | 
            +
                - 'lib/geo_combine/geoblacklight.rb'
         | 
| 157 | 
            +
                - 'lib/geo_combine/geometry_types.rb'
         | 
| 158 | 
            +
                - 'lib/geo_combine/iso19139.rb'
         | 
| 159 | 
            +
             | 
| 160 | 
            +
            # Offense count: 7
         | 
| 161 | 
            +
            # Cop supports --auto-correct.
         | 
| 162 | 
            +
            # Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
         | 
| 163 | 
            +
            # URISchemes: http, https
         | 
| 164 | 
            +
            Layout/LineLength:
         | 
| 165 | 
            +
              Max: 159
         | 
    
        data/Gemfile
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -1,6 +1,8 @@ | |
| 1 1 | 
             
            # GeoCombine
         | 
| 2 2 |  | 
| 3 | 
            -
             | 
| 3 | 
            +
               
         | 
| 4 | 
            +
            | []()
         | 
| 5 | 
            +
            | [](https://github.com/OpenGeoMetadata/GeoCombine/releases)
         | 
| 4 6 |  | 
| 5 7 |  | 
| 6 8 | 
             
            A Ruby toolkit for managing geospatial metadata, including:
         | 
| @@ -61,6 +63,12 @@ You can also specify a single repository: | |
| 61 63 | 
             
            $ bundle exec rake geocombine:clone[edu.stanford.purl]
         | 
| 62 64 | 
             
            ```
         | 
| 63 65 |  | 
| 66 | 
            +
            *Note: If you are using zsh, you will need to use escape characters in front of the brackets:*
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            ```sh
         | 
| 69 | 
            +
            $ bundle exec rake geocombine:clone\[edu.stanford.purl\]
         | 
| 70 | 
            +
            ```
         | 
| 71 | 
            +
             | 
| 64 72 | 
             
            #### Update local OpenGeoMetadata repositories
         | 
| 65 73 |  | 
| 66 74 | 
             
            ```sh
         | 
| @@ -75,6 +83,12 @@ You can also specify a single repository: | |
| 75 83 | 
             
            $ bundle exec rake geocombine:pull[edu.stanford.purl]
         | 
| 76 84 | 
             
            ```
         | 
| 77 85 |  | 
| 86 | 
            +
            *Note: If you are using zsh, you will need to use escape characters in front of the brackets:*
         | 
| 87 | 
            +
             | 
| 88 | 
            +
            ```sh
         | 
| 89 | 
            +
            $ bundle exec rake geocombine:pull\[edu.stanford.purl\]
         | 
| 90 | 
            +
            ```
         | 
| 91 | 
            +
             | 
| 78 92 | 
             
            #### Index GeoBlacklight documents
         | 
| 79 93 |  | 
| 80 94 | 
             
            To index into Solr, GeoCombine requires a Solr instance that is running the
         | 
| @@ -105,12 +119,16 @@ $ SOLR_COMMIT_WITHIN=100 bundle exec rake geocombine:index | |
| 105 119 |  | 
| 106 120 | 
             
            GeoCombine provides a Harvester class and rake task to harvest and index content from GeoBlacklight sites (or any site that follows the Blacklight API format). Given that the configurations can change from consumer to consumer and site to site, the class provides a relatively simple configuration API. This can be configured in an initializer, a wrapping rake task, or any other ruby context where the rake task our class would be invoked.
         | 
| 107 121 |  | 
| 122 | 
            +
            ```sh
         | 
| 123 | 
            +
            bundle exec rake geocombine:geoblacklight_harvester:index[YOUR_CONFIGURED_SITE_KEY]
         | 
| 124 | 
            +
            ```
         | 
| 125 | 
            +
             | 
| 108 126 | 
             
            #### Harvester configuration
         | 
| 109 127 |  | 
| 110 128 | 
             
            Only the sites themselves are required to be configured but there are various configuration options that can (optionally) be supplied to modify the harvester's behavior.
         | 
| 111 129 |  | 
| 112 130 | 
             
            ```ruby
         | 
| 113 | 
            -
            GeoCombine:: | 
| 131 | 
            +
            GeoCombine::GeoBlacklightHarvester.configure do
         | 
| 114 132 | 
             
              {
         | 
| 115 133 | 
             
                commit_within: '10000',
         | 
| 116 134 | 
             
                crawl_delay: 1, # All sites
         | 
    
        data/Rakefile
    CHANGED
    
    | @@ -1,6 +1,8 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            require 'bundler/gem_tasks'
         | 
| 2 4 |  | 
| 3 | 
            -
            Dir.glob('lib/tasks/*.rake').each { |r| load r}
         | 
| 5 | 
            +
            Dir.glob('lib/tasks/*.rake').each { |r| load r }
         | 
| 4 6 |  | 
| 5 7 | 
             
            desc 'Run console for development'
         | 
| 6 8 | 
             
            task :console do
         | 
| @@ -17,7 +19,7 @@ begin | |
| 17 19 |  | 
| 18 20 | 
             
              RSpec::Core::RakeTask.new(:spec)
         | 
| 19 21 |  | 
| 20 | 
            -
              task : | 
| 22 | 
            +
              task default: :spec
         | 
| 21 23 | 
             
            rescue LoadError
         | 
| 22 24 | 
             
              # no rspec available
         | 
| 23 25 | 
             
            end
         | 
    
        data/bin/geocombine
    CHANGED
    
    
    
        data/geo_combine.gemspec
    CHANGED
    
    | @@ -20,14 +20,19 @@ Gem::Specification.new do |spec| | |
| 20 20 |  | 
| 21 21 | 
             
              spec.add_dependency 'activesupport'
         | 
| 22 22 | 
             
              spec.add_dependency 'rsolr'
         | 
| 23 | 
            -
              spec.add_dependency 'net-http-persistent', '~> 2.0' # pin since faraday (rsolr) doesn't work correctly with 3.x
         | 
| 24 23 | 
             
              spec.add_dependency 'nokogiri'
         | 
| 25 24 | 
             
              spec.add_dependency 'json-schema'
         | 
| 26 25 | 
             
              spec.add_dependency 'sanitize'
         | 
| 27 26 | 
             
              spec.add_dependency 'thor'
         | 
| 27 | 
            +
              spec.add_dependency 'faraday-net_http_persistent', '~> 2.0'
         | 
| 28 28 |  | 
| 29 29 | 
             
              spec.add_development_dependency 'bundler'
         | 
| 30 30 | 
             
              spec.add_development_dependency 'rake'
         | 
| 31 31 | 
             
              spec.add_development_dependency 'rspec'
         | 
| 32 32 | 
             
              spec.add_development_dependency 'rspec-html-matchers'
         | 
| 33 | 
            +
              spec.add_development_dependency 'rubocop', '~> 1.25'
         | 
| 34 | 
            +
              spec.add_development_dependency 'rubocop-rspec', '~> 2.8'
         | 
| 35 | 
            +
              spec.add_development_dependency 'rubocop-rake'
         | 
| 36 | 
            +
              spec.add_development_dependency 'simplecov'
         | 
| 37 | 
            +
              spec.add_development_dependency 'webmock', '~> 3.14'
         | 
| 33 38 | 
             
            end
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module GeoCombine
         | 
| 2 4 | 
             
              class BoundingBox
         | 
| 3 5 | 
             
                attr_reader :west, :south, :east, :north
         | 
| @@ -24,11 +26,13 @@ module GeoCombine | |
| 24 26 | 
             
                def valid?
         | 
| 25 27 | 
             
                  [south, north].map do |coord|
         | 
| 26 28 | 
             
                    next if (-90..90).cover?(coord)
         | 
| 29 | 
            +
             | 
| 27 30 | 
             
                    raise GeoCombine::Exceptions::InvalidGeometry,
         | 
| 28 31 | 
             
                          "#{coord} should be in range -90 90"
         | 
| 29 32 | 
             
                  end
         | 
| 30 33 | 
             
                  [east, west].map do |coord|
         | 
| 31 34 | 
             
                    next if (-180..180).cover?(coord)
         | 
| 35 | 
            +
             | 
| 32 36 | 
             
                    raise GeoCombine::Exceptions::InvalidGeometry,
         | 
| 33 37 | 
             
                          "#{coord} should be in range -180 180"
         | 
| 34 38 | 
             
                  end
         | 
| @@ -45,7 +49,8 @@ module GeoCombine | |
| 45 49 |  | 
| 46 50 | 
             
                def self.from_envelope(envelope)
         | 
| 47 51 | 
             
                  return if envelope.nil?
         | 
| 48 | 
            -
             | 
| 52 | 
            +
             | 
| 53 | 
            +
                  envelope = envelope[/.*ENVELOPE\(([^)]*)/, 1].split(',')
         | 
| 49 54 | 
             
                  new(
         | 
| 50 55 | 
             
                    west: envelope[0],
         | 
| 51 56 | 
             
                    south: envelope[3],
         | 
| @@ -59,6 +64,7 @@ module GeoCombine | |
| 59 64 | 
             
                # @param [String] delimiter "," or " "
         | 
| 60 65 | 
             
                def self.from_string_delimiter(spatial, delimiter: ',')
         | 
| 61 66 | 
             
                  return if spatial.nil?
         | 
| 67 | 
            +
             | 
| 62 68 | 
             
                  spatial = spatial.split(delimiter)
         | 
| 63 69 | 
             
                  new(
         | 
| 64 70 | 
             
                    west: spatial[0],
         | 
| @@ -1,8 +1,11 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module GeoCombine
         | 
| 2 4 | 
             
              class CkanMetadata
         | 
| 3 | 
            -
                MAX_STRING_LENGTH =  | 
| 5 | 
            +
                MAX_STRING_LENGTH = 32_765 # Solr limit
         | 
| 4 6 |  | 
| 5 7 | 
             
                attr_reader :metadata
         | 
| 8 | 
            +
             | 
| 6 9 | 
             
                def initialize(metadata)
         | 
| 7 10 | 
             
                  @metadata = JSON.parse(metadata)
         | 
| 8 11 | 
             
                end
         | 
| @@ -31,7 +34,7 @@ module GeoCombine | |
| 31 34 | 
             
                    dc_subject_sm: subjects,
         | 
| 32 35 | 
             
                    dct_references_s: external_references.to_json.to_s,
         | 
| 33 36 | 
             
                    dc_format_s: downloadable? ? 'ZIP' : nil # TODO: we only allow direct ZIP file downloads
         | 
| 34 | 
            -
                  }. | 
| 37 | 
            +
                  }.compact
         | 
| 35 38 | 
             
                end
         | 
| 36 39 |  | 
| 37 40 | 
             
                def organization
         | 
| @@ -54,7 +57,7 @@ module GeoCombine | |
| 54 57 | 
             
                  begin
         | 
| 55 58 | 
             
                    return bbox.to_envelope if bbox.valid?
         | 
| 56 59 | 
             
                  rescue GeoCombine::Exceptions::InvalidGeometry
         | 
| 57 | 
            -
                     | 
| 60 | 
            +
                    nil
         | 
| 58 61 | 
             
                  end
         | 
| 59 62 | 
             
                end
         | 
| 60 63 |  | 
| @@ -66,7 +69,7 @@ module GeoCombine | |
| 66 69 | 
             
                  begin
         | 
| 67 70 | 
             
                    return bbox.to_envelope if bbox.valid?
         | 
| 68 71 | 
             
                  rescue GeoCombine::Exceptions::InvalidGeometry
         | 
| 69 | 
            -
                     | 
| 72 | 
            +
                    nil
         | 
| 70 73 | 
             
                  end
         | 
| 71 74 | 
             
                end
         | 
| 72 75 |  | 
| @@ -87,11 +90,9 @@ module GeoCombine | |
| 87 90 | 
             
                    'http://schema.org/url' => resource_urls('information').first
         | 
| 88 91 | 
             
                  }
         | 
| 89 92 |  | 
| 90 | 
            -
                  if downloadable?
         | 
| 91 | 
            -
                    h['http://schema.org/downloadUrl'] = resource_urls('download').first
         | 
| 92 | 
            -
                  end
         | 
| 93 | 
            +
                  h['http://schema.org/downloadUrl'] = resource_urls('download').first if downloadable?
         | 
| 93 94 |  | 
| 94 | 
            -
                  h. | 
| 95 | 
            +
                  h.compact
         | 
| 95 96 | 
             
                end
         | 
| 96 97 |  | 
| 97 98 | 
             
                def downloadable?
         | 
| @@ -100,6 +101,7 @@ module GeoCombine | |
| 100 101 |  | 
| 101 102 | 
             
                def resources(type)
         | 
| 102 103 | 
             
                  return [] if @metadata['resources'].nil?
         | 
| 104 | 
            +
             | 
| 103 105 | 
             
                  @metadata['resources'].select { |resource| resource['resource_locator_function'] == type }
         | 
| 104 106 | 
             
                end
         | 
| 105 107 |  | 
    
        data/lib/geo_combine/cli.rb
    CHANGED
    
    | @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            require 'thor'
         | 
| 2 4 | 
             
            require 'rake'
         | 
| 3 5 |  | 
| @@ -17,7 +19,7 @@ module GeoCombine | |
| 17 19 | 
             
                  Rake::Task['geocombine:pull'].invoke
         | 
| 18 20 | 
             
                end
         | 
| 19 21 |  | 
| 20 | 
            -
                desc  | 
| 22 | 
            +
                desc 'index', 'Index all of the GeoBlacklight documents'
         | 
| 21 23 | 
             
                def index
         | 
| 22 24 | 
             
                  Rake::Task['geocombine:index'].invoke
         | 
| 23 25 | 
             
                end
         | 
    
        data/lib/geo_combine/fgdc.rb
    CHANGED
    
    | @@ -1,9 +1,9 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 | 
            +
            module GeoCombine
         | 
| 3 4 | 
             
              ##
         | 
| 4 5 | 
             
              # FIXME: FGDC parsing, transformations are still experimental
         | 
| 5 6 | 
             
              class Fgdc < Metadata
         | 
| 6 | 
            -
             | 
| 7 7 | 
             
                ##
         | 
| 8 8 | 
             
                # Returns a Nokogiri::XSLT object containing the FGDC to GeoBlacklight XSL
         | 
| 9 9 | 
             
                # @return [Nokogiri::XSLT]
         | 
    
        data/lib/geo_combine/formats.rb
    CHANGED
    
    
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module GeoCombine
         | 
| 2 4 | 
             
              ##
         | 
| 3 5 | 
             
              # Mixin used for formatting metadata fields
         | 
| @@ -28,7 +30,7 @@ module GeoCombine | |
| 28 30 |  | 
| 29 31 | 
             
                # slugs should be lowercase and only have a-z, A-Z, 0-9, and -
         | 
| 30 32 | 
             
                def sluggify(slug)
         | 
| 31 | 
            -
                  slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub( | 
| 33 | 
            +
                  slug.gsub(/[^a-zA-Z0-9\-]/, '-').gsub(/-+/, '-').downcase
         | 
| 32 34 | 
             
                end
         | 
| 33 35 | 
             
              end
         | 
| 34 36 | 
             
            end
         | 
| @@ -30,17 +30,21 @@ module GeoCombine | |
| 30 30 | 
             
                  end
         | 
| 31 31 |  | 
| 32 32 | 
             
                  def document_transformer
         | 
| 33 | 
            -
                    @document_transformer ||  | 
| 33 | 
            +
                    @document_transformer || lambda do |document|
         | 
| 34 34 | 
             
                      document.delete('_version_')
         | 
| 35 35 | 
             
                      document.delete('score')
         | 
| 36 36 | 
             
                      document.delete('timestamp')
         | 
| 37 | 
            +
                      document.delete('solr_bboxtype__minX')
         | 
| 38 | 
            +
                      document.delete('solr_bboxtype__minY')
         | 
| 39 | 
            +
                      document.delete('solr_bboxtype__maxX')
         | 
| 40 | 
            +
                      document.delete('solr_bboxtype__maxY')
         | 
| 37 41 | 
             
                      document
         | 
| 38 42 | 
             
                    end
         | 
| 39 43 | 
             
                  end
         | 
| 40 44 | 
             
                end
         | 
| 41 45 |  | 
| 42 | 
            -
             | 
| 43 46 | 
             
                attr_reader :site, :site_key
         | 
| 47 | 
            +
             | 
| 44 48 | 
             
                def initialize(site_key)
         | 
| 45 49 | 
             
                  @site_key = site_key
         | 
| 46 50 | 
             
                  @site = self.class.config[site_key]
         | 
| @@ -55,7 +59,7 @@ module GeoCombine | |
| 55 59 |  | 
| 56 60 | 
             
                  response_class.new(response: response, base_url: base_url).documents.each do |docs|
         | 
| 57 61 | 
             
                    docs.map! do |document|
         | 
| 58 | 
            -
                      self.class.document_transformer | 
| 62 | 
            +
                      self.class.document_transformer&.call(document)
         | 
| 59 63 | 
             
                    end.compact
         | 
| 60 64 |  | 
| 61 65 | 
             
                    puts "Adding #{docs.count} documents to solr" if self.class.config[:debug]
         | 
| @@ -77,7 +81,8 @@ module GeoCombine | |
| 77 81 | 
             
                    elsif keys.any? && %w[links data].all? { |param| keys.include?(param) }
         | 
| 78 82 | 
             
                      ModernBlacklightResponse
         | 
| 79 83 | 
             
                    else
         | 
| 80 | 
            -
                      raise NotImplementedError, | 
| 84 | 
            +
                      raise NotImplementedError,
         | 
| 85 | 
            +
                            "The following json response was not able to be parsed by the GeoBlacklightHarvester\n#{json}"
         | 
| 81 86 | 
             
                    end
         | 
| 82 87 | 
             
                  end
         | 
| 83 88 | 
             
                end
         | 
| @@ -85,6 +90,7 @@ module GeoCombine | |
| 85 90 | 
             
                class LegacyBlacklightResponse
         | 
| 86 91 | 
             
                  attr_reader :base_url
         | 
| 87 92 | 
             
                  attr_accessor :response, :page
         | 
| 93 | 
            +
             | 
| 88 94 | 
             
                  def initialize(response:, base_url:)
         | 
| 89 95 | 
             
                    @base_url = base_url
         | 
| 90 96 | 
             
                    @response = response
         | 
| @@ -94,16 +100,17 @@ module GeoCombine | |
| 94 100 | 
             
                  def documents
         | 
| 95 101 | 
             
                    return enum_for(:documents) unless block_given?
         | 
| 96 102 |  | 
| 97 | 
            -
                    while current_page && total_pages && (current_page <= total_pages) | 
| 103 | 
            +
                    while current_page && total_pages && (current_page <= total_pages)
         | 
| 98 104 | 
             
                      yield response.dig('response', 'docs')
         | 
| 99 105 |  | 
| 100 106 | 
             
                      break if current_page == total_pages
         | 
| 107 | 
            +
             | 
| 101 108 | 
             
                      self.page += 1
         | 
| 102 109 | 
             
                      puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
         | 
| 103 110 |  | 
| 104 111 | 
             
                      begin
         | 
| 105 112 | 
             
                        self.response = JSON.parse(Net::HTTP.get(URI(url)))
         | 
| 106 | 
            -
                      rescue => e
         | 
| 113 | 
            +
                      rescue StandardError => e
         | 
| 107 114 | 
             
                        puts "Request for #{url} failed with #{e}"
         | 
| 108 115 | 
             
                        self.response = nil
         | 
| 109 116 | 
             
                      end
         | 
| @@ -130,6 +137,7 @@ module GeoCombine | |
| 130 137 | 
             
                class ModernBlacklightResponse
         | 
| 131 138 | 
             
                  attr_reader :base_url
         | 
| 132 139 | 
             
                  attr_accessor :response, :page
         | 
| 140 | 
            +
             | 
| 133 141 | 
             
                  def initialize(response:, base_url:)
         | 
| 134 142 | 
             
                    @base_url = base_url
         | 
| 135 143 | 
             
                    @response = response
         | 
| @@ -146,11 +154,13 @@ module GeoCombine | |
| 146 154 |  | 
| 147 155 | 
             
                      url = response.dig('links', 'next')
         | 
| 148 156 | 
             
                      break unless url
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                      url = "#{url}&format=json"
         | 
| 149 159 | 
             
                      self.page += 1
         | 
| 150 160 | 
             
                      puts "Fetching page #{page} @ #{url}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
         | 
| 151 161 | 
             
                      begin
         | 
| 152 162 | 
             
                        self.response = JSON.parse(Net::HTTP.get(URI(url)))
         | 
| 153 | 
            -
                      rescue => e
         | 
| 163 | 
            +
                      rescue StandardError => e
         | 
| 154 164 | 
             
                        puts "Request for #{url} failed with #{e}"
         | 
| 155 165 | 
             
                        self.response = nil
         | 
| 156 166 | 
             
                      end
         | 
| @@ -162,13 +172,11 @@ module GeoCombine | |
| 162 172 | 
             
                  def documents_from_urls(urls)
         | 
| 163 173 | 
             
                    puts "Fetching #{urls.count} documents for page #{page}" if GeoCombine::GeoBlacklightHarvester.config[:debug]
         | 
| 164 174 | 
             
                    urls.map do |url|
         | 
| 165 | 
            -
                       | 
| 166 | 
            -
             | 
| 167 | 
            -
                       | 
| 168 | 
            -
                        puts "Fetching \"#{url}/raw\" failed with #{e}"
         | 
| 175 | 
            +
                      JSON.parse(Net::HTTP.get(URI("#{url}/raw")))
         | 
| 176 | 
            +
                    rescue StandardError => e
         | 
| 177 | 
            +
                      puts "Fetching \"#{url}/raw\" failed with #{e}"
         | 
| 169 178 |  | 
| 170 | 
            -
             | 
| 171 | 
            -
                      end
         | 
| 179 | 
            +
                      nil
         | 
| 172 180 | 
             
                    end.compact
         | 
| 173 181 | 
             
                  end
         | 
| 174 182 | 
             
                end
         |