url_finder 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7514dce4f92bf06c8d4de7ea64778e43f5edde852e529365c7e55b61353df6cc
4
+ data.tar.gz: 563a39ca2f31130388a9ed2bedb56417f49196c0e11fdb31f1d83aaf99df6485
5
+ SHA512:
6
+ metadata.gz: d78fb982fa14ae0006752baa5d82441b09b3a676305405c42cc662794841c39fbb9aa679cb01f42e111d6e467cedb91b5233d568bd360bf005bd77c6580b3cc9
7
+ data.tar.gz: 805618369adfbb8bf1ab2ae067cb1f19392ee6b30cc58b36d21318924e3d6a8023b0df8f0302b291140c3e6b2bb9fa7f8d38e9f84fc2e89e5f30328202f573ae
data/.byebug_history ADDED
@@ -0,0 +1,3 @@
1
+ exit
2
+ finder.urls.class
3
+ finder.urls
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+
13
+ # This is a library so don't include the lock file
14
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,6 @@
1
+ inherit_from: .ruby-style-guide.yml
2
+ AllCops:
3
+ UseCache: true
4
+ CacheRootDirectory: tmp
5
+ Exclude:
6
+ - 'tmp/*.rb'
@@ -0,0 +1,265 @@
1
+ Rails:
2
+ Enabled: false
3
+ AllCops:
4
+ TargetRubyVersion: 2.3
5
+ Exclude:
6
+ - "vendor/**/*"
7
+ UseCache: true
8
+ Style/CollectionMethods:
9
+ Description: Preferred collection methods.
10
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#map-find-select-reduce-size
11
+ Enabled: true
12
+ PreferredMethods:
13
+ collect: map
14
+ collect!: map!
15
+ find: detect
16
+ find_all: select
17
+ reduce: inject
18
+ Style/RedundantFreeze:
19
+ Description: "Checks usages of Object#freeze on immutable objects."
20
+ Enabled: false
21
+ Layout/DotPosition:
22
+ Description: Checks the position of the dot in multi-line method calls.
23
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#consistent-multi-line-chains
24
+ Enabled: true
25
+ EnforcedStyle: trailing
26
+ SupportedStyles:
27
+ - leading
28
+ - trailing
29
+ Naming/FileName:
30
+ Description: Use snake_case for source file names.
31
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#snake-case-files
32
+ Enabled: false
33
+ Exclude: []
34
+ Naming/MemoizedInstanceVariableName:
35
+ Description: Memoized method name should match memo instance variable name.
36
+ Enabled: false
37
+ Naming/UncommunicativeMethodParamName:
38
+ Description: >-
39
+ Checks for method parameter names that contain capital letters,
40
+ end in numbers, or do not meet a minimal length.
41
+ Enabled: false
42
+ Style/GuardClause:
43
+ Description: Check for conditionals that can be replaced with guard clauses
44
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-nested-conditionals
45
+ Enabled: true
46
+ MinBodyLength: 3
47
+ Style/IfUnlessModifier:
48
+ Description: Favor modifier if/unless usage when you have a single-line body.
49
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#if-as-a-modifier
50
+ Enabled: false
51
+ Style/OptionHash:
52
+ Description: Don't use option hashes when you can use keyword arguments.
53
+ Enabled: false
54
+ Style/PercentLiteralDelimiters:
55
+ Description: Use `%`-literal delimiters consistently
56
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#percent-literal-braces
57
+ Enabled: false
58
+ PreferredDelimiters:
59
+ "%": "()"
60
+ "%i": "()"
61
+ "%q": "()"
62
+ "%Q": "()"
63
+ "%r": "{}"
64
+ "%s": "()"
65
+ "%w": "()"
66
+ "%W": "()"
67
+ "%x": "()"
68
+ Naming/PredicateName:
69
+ Description: Check the names of predicate methods.
70
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#bool-methods-qmark
71
+ Enabled: true
72
+ NamePrefix:
73
+ - is_
74
+ - has_
75
+ - have_
76
+ NamePrefixBlacklist:
77
+ - is_
78
+ Exclude:
79
+ - spec/**/*
80
+ Style/RaiseArgs:
81
+ Description: Checks the arguments passed to raise/fail.
82
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#exception-class-messages
83
+ Enabled: false
84
+ EnforcedStyle: exploded
85
+ SupportedStyles:
86
+ - compact
87
+ - exploded
88
+ Style/SignalException:
89
+ Description: Checks for proper usage of fail and raise.
90
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#fail-method
91
+ Enabled: false
92
+ EnforcedStyle: semantic
93
+ SupportedStyles:
94
+ - only_raise
95
+ - only_fail
96
+ - semantic
97
+ Style/SingleLineBlockParams:
98
+ Description: Enforces the names of some block params.
99
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#reduce-blocks
100
+ Enabled: false
101
+ Methods:
102
+ - reduce:
103
+ - a
104
+ - e
105
+ - inject:
106
+ - a
107
+ - e
108
+ Style/TrivialAccessors:
109
+ Enabled: false
110
+ Style/SingleLineMethods:
111
+ Description: Avoid single-line methods.
112
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-single-line-methods
113
+ Enabled: false
114
+ AllowIfMethodIsEmpty: true
115
+ Style/StringLiterals:
116
+ Description: Checks if uses of quotes match the configured preference.
117
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#consistent-string-literals
118
+ Enabled: true
119
+ EnforcedStyle: single_quotes
120
+ SupportedStyles:
121
+ - single_quotes
122
+ - double_quotes
123
+ Style/MixinUsage:
124
+ Enabled: true
125
+ Exclude:
126
+ - exe/*
127
+ Style/StringLiteralsInInterpolation:
128
+ Description: Checks if uses of quotes inside expressions in interpolated strings
129
+ match the configured preference.
130
+ Enabled: true
131
+ EnforcedStyle: single_quotes
132
+ SupportedStyles:
133
+ - single_quotes
134
+ - double_quotes
135
+ Style/TrailingCommaInArrayLiteral:
136
+ Description: Checks for trailing comma in parameter lists and literals.
137
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas
138
+ Enabled: true
139
+ EnforcedStyleForMultiline: comma
140
+ Style/TrailingCommaInHashLiteral:
141
+ Description: Checks for trailing comma in parameter lists and literals.
142
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas
143
+ Enabled: true
144
+ EnforcedStyleForMultiline: comma
145
+ Metrics/AbcSize:
146
+ Description: A calculated magnitude based on number of assignments, branches, and
147
+ conditions.
148
+ Enabled: false
149
+ Max: 15
150
+ Metrics/ClassLength:
151
+ Description: Avoid classes longer than 100 lines of code.
152
+ Enabled: false
153
+ CountComments: false
154
+ Max: 100
155
+ Metrics/ModuleLength:
156
+ CountComments: false
157
+ Max: 100
158
+ Description: Avoid modules longer than 100 lines of code.
159
+ Enabled: false
160
+ Metrics/CyclomaticComplexity:
161
+ Description: A complexity metric that is strongly correlated to the number of test
162
+ cases needed to validate a method.
163
+ Enabled: false
164
+ Max: 6
165
+ Metrics/MethodLength:
166
+ Description: Avoid methods longer than 10 lines of code.
167
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#short-methods
168
+ Enabled: false
169
+ CountComments: false
170
+ Max: 10
171
+ Metrics/ParameterLists:
172
+ Description: Avoid parameter lists longer than three or four parameters.
173
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#too-many-params
174
+ Enabled: false
175
+ Max: 5
176
+ CountKeywordArgs: true
177
+ Metrics/PerceivedComplexity:
178
+ Description: A complexity metric geared towards measuring complexity for a human
179
+ reader.
180
+ Enabled: false
181
+ Max: 7
182
+ Metrics/LineLength:
183
+ Description: Maximum line length
184
+ Enabled: true
185
+ Max: 95
186
+ Exclude:
187
+ - exe/url_finder
188
+ - lib/url_finder/cli.rb
189
+ - Gemfile
190
+ - url_finder.gemspec
191
+ - spec/**/*
192
+ Metrics/BlockLength:
193
+ Enabled: true
194
+ Exclude:
195
+ - lib/url_finder/cli/*
196
+ - spec/**/*
197
+ Lint/AssignmentInCondition:
198
+ Description: Don't use assignment in conditions.
199
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#safe-assignment-in-condition
200
+ Enabled: false
201
+ AllowSafeAssignment: true
202
+ Style/InlineComment:
203
+ Description: Avoid inline comments.
204
+ Enabled: false
205
+ Naming/AccessorMethodName:
206
+ Description: Check the naming of accessor methods for get_/set_.
207
+ Enabled: false
208
+ Style/Alias:
209
+ Description: Use alias_method instead of alias.
210
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#alias-method
211
+ Enabled: false
212
+ Style/Documentation:
213
+ Description: Document classes and non-namespace modules.
214
+ Enabled: false
215
+ Style/DoubleNegation:
216
+ Description: Checks for uses of double negation (!!).
217
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-bang-bang
218
+ Enabled: false
219
+ Style/EachWithObject:
220
+ Description: Prefer `each_with_object` over `inject` or `reduce`.
221
+ Enabled: false
222
+ Style/EmptyLiteral:
223
+ Description: Prefer literals to Array.new/Hash.new/String.new.
224
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#literal-array-hash
225
+ Enabled: false
226
+ Style/ModuleFunction:
227
+ Description: Checks for usage of `extend self` in modules.
228
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#module-function
229
+ Enabled: false
230
+ Style/OneLineConditional:
231
+ Description: Favor the ternary operator(?:) over if/then/else/end constructs.
232
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#ternary-operator
233
+ Enabled: false
234
+ Style/PerlBackrefs:
235
+ Description: Avoid Perl-style regex back references.
236
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-perl-regexp-last-matchers
237
+ Enabled: false
238
+ Style/Send:
239
+ Description: Prefer `Object#__send__` or `Object#public_send` to `send`, as `send`
240
+ may overlap with existing methods.
241
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#prefer-public-send
242
+ Enabled: false
243
+ Style/SpecialGlobalVars:
244
+ Description: Avoid Perl-style global variables.
245
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-cryptic-perlisms
246
+ Enabled: false
247
+ Style/VariableInterpolation:
248
+ Description: Don't interpolate global, instance and class variables directly in
249
+ strings.
250
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#curlies-interpolate
251
+ Enabled: false
252
+ Style/WhenThen:
253
+ Description: Use when x then ... for one-line cases.
254
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#one-line-cases
255
+ Enabled: false
256
+ Lint/EachWithObjectArgument:
257
+ Description: Check for immutable argument given to each_with_object.
258
+ Enabled: true
259
+ Lint/HandleExceptions:
260
+ Description: Don't suppress exception.
261
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#dont-hide-exceptions
262
+ Enabled: false
263
+ Lint/LiteralInInterpolation:
264
+ Description: Checks for literals used in interpolation.
265
+ Enabled: false
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.0
5
+ - 2.5.1
6
+ before_install: gem install bundler -v 1.16.2
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
+
7
+ # Specify your gem's dependencies in url_finder.gemspec
8
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Jacob Burenstam
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # UrlFinder
2
+
3
+ Find URLs in various file formats - supports markdown, HTML, CSV and regular text.
4
+
5
+ - [Usage](#usage)
6
+ - [CLI](#cli)
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'url_finder'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install url_finder
23
+
24
+ ## Usage
25
+
26
+ Find URLs in file, we will infer the format from the extension
27
+
28
+ ```ruby
29
+ UrlFinder.from_file('README.md').each do |url|
30
+ puts "Found: #{url}"
31
+ end
32
+ ```
33
+
34
+ You can explicitly pass the format if the file lacks an extension
35
+
36
+ ```ruby
37
+ UrlFinder.from_file('README', 'md').each do |url|
38
+ puts "Found: #{url}"
39
+ end
40
+ ```
41
+
42
+ Supported formats are `markdown` (aliased as `md`), `html`, `csv` and `string`.
43
+
44
+ Find URLs in string
45
+ ```ruby
46
+ html = '<a href="http://example.com">example.com</a>'
47
+ UrlFinder.from(html, 'html').each do |url|
48
+ puts "Found: #{url}"
49
+ end
50
+ ```
51
+
52
+ ## CLI
53
+
54
+ ```
55
+ Usage: url_finder --help
56
+ --file=/path/to/file Input file
57
+ --format=file_format Input format (html, markdown, csv, string)
58
+ -h, --help How to use
59
+ ```
60
+
61
+ ## Wish list
62
+
63
+ - Better CSV support
64
+ + Current support is extremely crude and makes a lot of assumptions about the file..
65
+ - RDoc support
66
+
67
+ ## Development
68
+
69
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
70
+
71
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
72
+
73
+ ## Contributing
74
+
75
+ Bug reports and pull requests are welcome on GitHub at https://github.com/buren/url_finder.
76
+
77
+ ## License
78
+
79
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'url_finder'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require 'irb'
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/exe/url_finder ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # for dev purposes
5
+ require 'bundler/setup' if ENV['URL_FINDER_GEM_DEV']
6
+
7
+ require 'url_finder'
8
+ require 'optparse'
9
+
10
+ options = {}
11
+ OptionParser.new do |parser|
12
+ parser.banner = 'Usage: url_finder --help'
13
+ parser.default_argv = ARGV
14
+
15
+ parser.on('--file=/path/to/file', String, 'Input file') do |string|
16
+ options[:file] = string
17
+ end
18
+
19
+ parser.on('--format=file_format', String, 'Input format (html, markdown, csv, string)') do |string|
20
+ options[:format] = string
21
+ end
22
+
23
+ parser.on('-h', '--help', 'How to use') do
24
+ puts parser
25
+ exit
26
+ end
27
+ end.parse!
28
+
29
+ file = options.fetch(:file) { raise(ArgumentError, '--file is required') }
30
+ file_format = options[:format]
31
+
32
+ UrlFinder.from_file(file, file_format).each do |url|
33
+ puts url
34
+ end
data/lib/url_finder.rb ADDED
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'url_finder/version'
4
+ require 'url_finder/reader'
5
+
6
+ # Gem namespace
7
+ module UrlFinder
8
+ # Find URLs in file
9
+ # @param [String] path to file
10
+ # @param [String] file_format
11
+ # of file if nil file format will tried to be infered from
12
+ # file extension (markdown, html, csv, string)
13
+ def self.from_file(path, file_format = nil)
14
+ file_format ||= path.split('.').last
15
+
16
+ from(File.read(path), file_format)
17
+ end
18
+
19
+ # Find URLs in string
20
+ # @param [String] content string
21
+ # @param [String] file_format of string (markdown, html, csv, string)
22
+ def self.from(content, file_format)
23
+ Reader.new(content, file_format).urls
24
+ end
25
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'url_finder/readers/csv_reader'
4
+ require 'url_finder/readers/html_reader'
5
+ require 'url_finder/readers/markdown_reader'
6
+ require 'url_finder/readers/string_reader'
7
+
8
+ module UrlFinder
9
+ # Handles reader delegatation
10
+ class Reader
11
+ FORMAT_READERS = {
12
+ 'markdown' => MarkdownReader,
13
+ 'md' => MarkdownReader,
14
+ 'html' => HTMLReader,
15
+ 'csv' => CSVReader,
16
+ 'string' => StringReader,
17
+ 'txt' => StringReader,
18
+ }.freeze
19
+
20
+ # The raw content
21
+ attr_reader :content
22
+
23
+ # Instansiates reader
24
+ def initialize(content, file_format)
25
+ @content = content
26
+ @file_format = file_format
27
+ end
28
+
29
+ # Returns the file format
30
+ # @return [String] the file format
31
+ def file_format
32
+ @file_format.to_s.downcase
33
+ end
34
+
35
+ # Returns the appropriate reader for the given file format or raises error
36
+ # @return [BaseReader] subclass of base reader
37
+ def urls
38
+ reader_klass = FORMAT_READERS.fetch(file_format) do
39
+ raise(ArgumentError, "unknown format #{file_format}")
40
+ end
41
+ reader_klass.new(content)
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UrlFinder
4
+ # Base class for reader implementations
5
+ class BaseReader
6
+ # Alias for #new
7
+ # @return [BaseReader] instance of BaseReader
8
+ def self.urls(*args)
9
+ new(*args)
10
+ end
11
+
12
+ include Enumerable
13
+
14
+ attr_reader :content
15
+
16
+ # Initialize reader
17
+ # @param [String] string to find URLs in
18
+ def initialize(content)
19
+ @content = content
20
+ @urls = nil
21
+ end
22
+
23
+ # Yield each url
24
+ # @see Enumerable#each
25
+ def each(&block)
26
+ urls.each(&block)
27
+ end
28
+
29
+ # @raise [NotImplementedError] raises since this should be implemented in subclass
30
+ def urls
31
+ raise(NotImplementedError, 'subclass must implement!')
32
+ end
33
+
34
+ # Returns true if no URLs were found
35
+ # @return [true, false] true if no URLs were found
36
+ def empty?
37
+ urls.empty?
38
+ end
39
+
40
+ # Returns the URLs as an array
41
+ # @return [Array<String>] the found URLs
42
+ def to_a
43
+ urls
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'url_finder/readers/base_reader'
5
+
6
+ module UrlFinder
7
+ # Find URLs in CSV string
8
+ class CSVReader < BaseReader
9
+ # Returns the found URLs
10
+ # @return [Array<String>] the found URLs
11
+ def urls
12
+ @urls ||= CSV.parse(content).map(&:first).compact
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'url_finder/readers/base_reader'
5
+
6
+ module UrlFinder
7
+ # Find URLs in HTML strings
8
+ class HTMLReader < BaseReader
9
+ # Returns the found URLs
10
+ # @return [Array<String>] the found URLs
11
+ def urls
12
+ document = Nokogiri::HTML(content)
13
+ @urls ||= document.css('a').map { |e| e['href'] }.compact
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'kramdown'
4
+ require 'url_finder/readers/base_reader'
5
+ require 'url_finder/readers/html_reader'
6
+
7
+ module UrlFinder
8
+ # Find URLs in Markdown strings
9
+ class MarkdownReader < BaseReader
10
+ # Returns the found URLs
11
+ # @return [Array<String>] the found URLs
12
+ def urls
13
+ html = Kramdown::Document.new(content).to_html
14
+ @urls ||= HTMLReader.new(html).urls
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+ require 'url_finder/readers/base_reader'
5
+
6
+ module UrlFinder
7
+ # Find URLs in strings
8
+ class StringReader < BaseReader
9
+ # Returns the found URLs
10
+ # @return [Array<String>] the found URLs
11
+ def urls
12
+ @urls ||= URI.extract(content)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UrlFinder
4
+ # Gem version
5
+ VERSION = '0.1.0'.freeze
6
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'url_finder/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'url_finder'
9
+ spec.version = UrlFinder::VERSION
10
+ spec.authors = ['Jacob Burenstam']
11
+ spec.email = ['burenstam@gmail.com']
12
+
13
+ spec.summary = 'Find URLs in common file formats (Markdown, HTML, CSV, string).'
14
+ spec.description = 'Find URLs in common file formats (Markdown, HTML, CSV, string) with ease - Ruby and CLI.'
15
+ spec.homepage = 'https://github.com/buren/url_finder'
16
+ spec.license = 'MIT'
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = 'exe'
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ['lib']
26
+
27
+ # TODO: Consider making kramdown & nokogiri optional
28
+ spec.add_dependency 'kramdown', '~> 1.17'
29
+ spec.add_dependency 'nokogiri', '~> 1.8'
30
+
31
+ spec.add_development_dependency 'bundler', '~> 1.16'
32
+ spec.add_development_dependency 'byebug'
33
+ spec.add_development_dependency 'yard'
34
+ spec.add_development_dependency 'rake', '~> 10.0'
35
+ spec.add_development_dependency 'rspec', '~> 3.0'
36
+ end
metadata ADDED
@@ -0,0 +1,166 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url_finder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jacob Burenstam
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-08-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: kramdown
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.17'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.17'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.8'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.16'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.16'
55
+ - !ruby/object:Gem::Dependency
56
+ name: byebug
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: yard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '10.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '10.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.0'
111
+ description: Find URLs in common file formats (Markdown, HTML, CSV, string) with ease
112
+ - Ruby and CLI.
113
+ email:
114
+ - burenstam@gmail.com
115
+ executables:
116
+ - url_finder
117
+ extensions: []
118
+ extra_rdoc_files: []
119
+ files:
120
+ - ".byebug_history"
121
+ - ".gitignore"
122
+ - ".rspec"
123
+ - ".rubocop.yml"
124
+ - ".ruby-style-guide.yml"
125
+ - ".travis.yml"
126
+ - Gemfile
127
+ - LICENSE.txt
128
+ - README.md
129
+ - Rakefile
130
+ - bin/console
131
+ - bin/setup
132
+ - exe/url_finder
133
+ - lib/url_finder.rb
134
+ - lib/url_finder/reader.rb
135
+ - lib/url_finder/readers/base_reader.rb
136
+ - lib/url_finder/readers/csv_reader.rb
137
+ - lib/url_finder/readers/html_reader.rb
138
+ - lib/url_finder/readers/markdown_reader.rb
139
+ - lib/url_finder/readers/string_reader.rb
140
+ - lib/url_finder/version.rb
141
+ - url_finder.gemspec
142
+ homepage: https://github.com/buren/url_finder
143
+ licenses:
144
+ - MIT
145
+ metadata: {}
146
+ post_install_message:
147
+ rdoc_options: []
148
+ require_paths:
149
+ - lib
150
+ required_ruby_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
155
+ required_rubygems_version: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ requirements: []
161
+ rubyforge_project:
162
+ rubygems_version: 2.7.6
163
+ signing_key:
164
+ specification_version: 4
165
+ summary: Find URLs in common file formats (Markdown, HTML, CSV, string).
166
+ test_files: []