url_finder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 7514dce4f92bf06c8d4de7ea64778e43f5edde852e529365c7e55b61353df6cc
4
+ data.tar.gz: 563a39ca2f31130388a9ed2bedb56417f49196c0e11fdb31f1d83aaf99df6485
5
+ SHA512:
6
+ metadata.gz: d78fb982fa14ae0006752baa5d82441b09b3a676305405c42cc662794841c39fbb9aa679cb01f42e111d6e467cedb91b5233d568bd360bf005bd77c6580b3cc9
7
+ data.tar.gz: 805618369adfbb8bf1ab2ae067cb1f19392ee6b30cc58b36d21318924e3d6a8023b0df8f0302b291140c3e6b2bb9fa7f8d38e9f84fc2e89e5f30328202f573ae
data/.byebug_history ADDED
@@ -0,0 +1,3 @@
1
+ exit
2
+ finder.urls.class
3
+ finder.urls
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+
13
+ # This is a library so don't include the lock file
14
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,6 @@
1
+ inherit_from: .ruby-style-guide.yml
2
+ AllCops:
3
+ UseCache: true
4
+ CacheRootDirectory: tmp
5
+ Exclude:
6
+ - 'tmp/*.rb'
@@ -0,0 +1,265 @@
1
+ Rails:
2
+ Enabled: false
3
+ AllCops:
4
+ TargetRubyVersion: 2.3
5
+ Exclude:
6
+ - "vendor/**/*"
7
+ UseCache: true
8
+ Style/CollectionMethods:
9
+ Description: Preferred collection methods.
10
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#map-find-select-reduce-size
11
+ Enabled: true
12
+ PreferredMethods:
13
+ collect: map
14
+ collect!: map!
15
+ find: detect
16
+ find_all: select
17
+ reduce: inject
18
+ Style/RedundantFreeze:
19
+ Description: "Checks usages of Object#freeze on immutable objects."
20
+ Enabled: false
21
+ Layout/DotPosition:
22
+ Description: Checks the position of the dot in multi-line method calls.
23
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#consistent-multi-line-chains
24
+ Enabled: true
25
+ EnforcedStyle: trailing
26
+ SupportedStyles:
27
+ - leading
28
+ - trailing
29
+ Naming/FileName:
30
+ Description: Use snake_case for source file names.
31
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#snake-case-files
32
+ Enabled: false
33
+ Exclude: []
34
+ Naming/MemoizedInstanceVariableName:
35
+ Description: Memoized method name should match memo instance variable name.
36
+ Enabled: false
37
+ Naming/UncommunicativeMethodParamName:
38
+ Description: >-
39
+ Checks for method parameter names that contain capital letters,
40
+ end in numbers, or do not meet a minimal length.
41
+ Enabled: false
42
+ Style/GuardClause:
43
+ Description: Check for conditionals that can be replaced with guard clauses
44
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-nested-conditionals
45
+ Enabled: true
46
+ MinBodyLength: 3
47
+ Style/IfUnlessModifier:
48
+ Description: Favor modifier if/unless usage when you have a single-line body.
49
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#if-as-a-modifier
50
+ Enabled: false
51
+ Style/OptionHash:
52
+ Description: Don't use option hashes when you can use keyword arguments.
53
+ Enabled: false
54
+ Style/PercentLiteralDelimiters:
55
+ Description: Use `%`-literal delimiters consistently
56
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#percent-literal-braces
57
+ Enabled: false
58
+ PreferredDelimiters:
59
+ "%": "()"
60
+ "%i": "()"
61
+ "%q": "()"
62
+ "%Q": "()"
63
+ "%r": "{}"
64
+ "%s": "()"
65
+ "%w": "()"
66
+ "%W": "()"
67
+ "%x": "()"
68
+ Naming/PredicateName:
69
+ Description: Check the names of predicate methods.
70
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#bool-methods-qmark
71
+ Enabled: true
72
+ NamePrefix:
73
+ - is_
74
+ - has_
75
+ - have_
76
+ NamePrefixBlacklist:
77
+ - is_
78
+ Exclude:
79
+ - spec/**/*
80
+ Style/RaiseArgs:
81
+ Description: Checks the arguments passed to raise/fail.
82
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#exception-class-messages
83
+ Enabled: false
84
+ EnforcedStyle: exploded
85
+ SupportedStyles:
86
+ - compact
87
+ - exploded
88
+ Style/SignalException:
89
+ Description: Checks for proper usage of fail and raise.
90
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#fail-method
91
+ Enabled: false
92
+ EnforcedStyle: semantic
93
+ SupportedStyles:
94
+ - only_raise
95
+ - only_fail
96
+ - semantic
97
+ Style/SingleLineBlockParams:
98
+ Description: Enforces the names of some block params.
99
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#reduce-blocks
100
+ Enabled: false
101
+ Methods:
102
+ - reduce:
103
+ - a
104
+ - e
105
+ - inject:
106
+ - a
107
+ - e
108
+ Style/TrivialAccessors:
109
+ Enabled: false
110
+ Style/SingleLineMethods:
111
+ Description: Avoid single-line methods.
112
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-single-line-methods
113
+ Enabled: false
114
+ AllowIfMethodIsEmpty: true
115
+ Style/StringLiterals:
116
+ Description: Checks if uses of quotes match the configured preference.
117
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#consistent-string-literals
118
+ Enabled: true
119
+ EnforcedStyle: single_quotes
120
+ SupportedStyles:
121
+ - single_quotes
122
+ - double_quotes
123
+ Style/MixinUsage:
124
+ Enabled: true
125
+ Exclude:
126
+ - exe/*
127
+ Style/StringLiteralsInInterpolation:
128
+ Description: Checks if uses of quotes inside expressions in interpolated strings
129
+ match the configured preference.
130
+ Enabled: true
131
+ EnforcedStyle: single_quotes
132
+ SupportedStyles:
133
+ - single_quotes
134
+ - double_quotes
135
+ Style/TrailingCommaInArrayLiteral:
136
+ Description: Checks for trailing comma in parameter lists and literals.
137
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas
138
+ Enabled: true
139
+ EnforcedStyleForMultiline: comma
140
+ Style/TrailingCommaInHashLiteral:
141
+ Description: Checks for trailing comma in parameter lists and literals.
142
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas
143
+ Enabled: true
144
+ EnforcedStyleForMultiline: comma
145
+ Metrics/AbcSize:
146
+ Description: A calculated magnitude based on number of assignments, branches, and
147
+ conditions.
148
+ Enabled: false
149
+ Max: 15
150
+ Metrics/ClassLength:
151
+ Description: Avoid classes longer than 100 lines of code.
152
+ Enabled: false
153
+ CountComments: false
154
+ Max: 100
155
+ Metrics/ModuleLength:
156
+ CountComments: false
157
+ Max: 100
158
+ Description: Avoid modules longer than 100 lines of code.
159
+ Enabled: false
160
+ Metrics/CyclomaticComplexity:
161
+ Description: A complexity metric that is strongly correlated to the number of test
162
+ cases needed to validate a method.
163
+ Enabled: false
164
+ Max: 6
165
+ Metrics/MethodLength:
166
+ Description: Avoid methods longer than 10 lines of code.
167
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#short-methods
168
+ Enabled: false
169
+ CountComments: false
170
+ Max: 10
171
+ Metrics/ParameterLists:
172
+ Description: Avoid parameter lists longer than three or four parameters.
173
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#too-many-params
174
+ Enabled: false
175
+ Max: 5
176
+ CountKeywordArgs: true
177
+ Metrics/PerceivedComplexity:
178
+ Description: A complexity metric geared towards measuring complexity for a human
179
+ reader.
180
+ Enabled: false
181
+ Max: 7
182
+ Metrics/LineLength:
183
+ Description: Maximum line length
184
+ Enabled: true
185
+ Max: 95
186
+ Exclude:
187
+ - exe/url_finder
188
+ - lib/url_finder/cli.rb
189
+ - Gemfile
190
+ - url_finder.gemspec
191
+ - spec/**/*
192
+ Metrics/BlockLength:
193
+ Enabled: true
194
+ Exclude:
195
+ - lib/url_finder/cli/*
196
+ - spec/**/*
197
+ Lint/AssignmentInCondition:
198
+ Description: Don't use assignment in conditions.
199
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#safe-assignment-in-condition
200
+ Enabled: false
201
+ AllowSafeAssignment: true
202
+ Style/InlineComment:
203
+ Description: Avoid inline comments.
204
+ Enabled: false
205
+ Naming/AccessorMethodName:
206
+ Description: Check the naming of accessor methods for get_/set_.
207
+ Enabled: false
208
+ Style/Alias:
209
+ Description: Use alias_method instead of alias.
210
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#alias-method
211
+ Enabled: false
212
+ Style/Documentation:
213
+ Description: Document classes and non-namespace modules.
214
+ Enabled: false
215
+ Style/DoubleNegation:
216
+ Description: Checks for uses of double negation (!!).
217
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-bang-bang
218
+ Enabled: false
219
+ Style/EachWithObject:
220
+ Description: Prefer `each_with_object` over `inject` or `reduce`.
221
+ Enabled: false
222
+ Style/EmptyLiteral:
223
+ Description: Prefer literals to Array.new/Hash.new/String.new.
224
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#literal-array-hash
225
+ Enabled: false
226
+ Style/ModuleFunction:
227
+ Description: Checks for usage of `extend self` in modules.
228
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#module-function
229
+ Enabled: false
230
+ Style/OneLineConditional:
231
+ Description: Favor the ternary operator(?:) over if/then/else/end constructs.
232
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#ternary-operator
233
+ Enabled: false
234
+ Style/PerlBackrefs:
235
+ Description: Avoid Perl-style regex back references.
236
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-perl-regexp-last-matchers
237
+ Enabled: false
238
+ Style/Send:
239
+ Description: Prefer `Object#__send__` or `Object#public_send` to `send`, as `send`
240
+ may overlap with existing methods.
241
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#prefer-public-send
242
+ Enabled: false
243
+ Style/SpecialGlobalVars:
244
+ Description: Avoid Perl-style global variables.
245
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-cryptic-perlisms
246
+ Enabled: false
247
+ Style/VariableInterpolation:
248
+ Description: Don't interpolate global, instance and class variables directly in
249
+ strings.
250
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#curlies-interpolate
251
+ Enabled: false
252
+ Style/WhenThen:
253
+ Description: Use when x then ... for one-line cases.
254
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#one-line-cases
255
+ Enabled: false
256
+ Lint/EachWithObjectArgument:
257
+ Description: Check for immutable argument given to each_with_object.
258
+ Enabled: true
259
+ Lint/HandleExceptions:
260
+ Description: Don't suppress exception.
261
+ StyleGuide: https://github.com/bbatsov/ruby-style-guide#dont-hide-exceptions
262
+ Enabled: false
263
+ Lint/LiteralInInterpolation:
264
+ Description: Checks for literals used in interpolation.
265
+ Enabled: false
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.0
5
+ - 2.5.1
6
+ before_install: gem install bundler -v 1.16.2
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
+
7
+ # Specify your gem's dependencies in url_finder.gemspec
8
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Jacob Burenstam
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # UrlFinder
2
+
3
+ Find URLs in various file formats - supports markdown, HTML, CSV and regular text.
4
+
5
+ - [Usage](#usage)
6
+ - [CLI](#cli)
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'url_finder'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install url_finder
23
+
24
+ ## Usage
25
+
26
+ Find URLs in file, we will infer the format from the extension
27
+
28
+ ```ruby
29
+ UrlFinder.from_file('README.md').each do |url|
30
+ puts "Found: #{url}"
31
+ end
32
+ ```
33
+
34
+ You can explicitly pass the format if the file lacks an extension
35
+
36
+ ```ruby
37
+ UrlFinder.from_file('README', 'md').each do |url|
38
+ puts "Found: #{url}"
39
+ end
40
+ ```
41
+
42
+ Supported formats are `markdown` (aliased as `md`), `html`, `csv` and `string`.
43
+
44
+ Find URLs in string
45
+ ```ruby
46
+ html = '<a href="http://example.com">example.com</a>'
47
+ UrlFinder.from(html, 'html').each do |url|
48
+ puts "Found: #{url}"
49
+ end
50
+ ```
51
+
52
+ ## CLI
53
+
54
+ ```
55
+ Usage: url_finder --help
56
+ --file=/path/to/file Input file
57
+ --format=file_format Input format (html, markdown, csv, string)
58
+ -h, --help How to use
59
+ ```
60
+
61
+ ## Wish list
62
+
63
+ - Better CSV support
64
+ + Current support is extremely crude and makes a lot of assumptions about the file..
65
+ - RDoc support
66
+
67
+ ## Development
68
+
69
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
70
+
71
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
72
+
73
+ ## Contributing
74
+
75
+ Bug reports and pull requests are welcome on GitHub at https://github.com/buren/url_finder.
76
+
77
+ ## License
78
+
79
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: :spec
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'url_finder'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require 'irb'
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/exe/url_finder ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # for dev purposes
5
+ require 'bundler/setup' if ENV['URL_FINDER_GEM_DEV']
6
+
7
+ require 'url_finder'
8
+ require 'optparse'
9
+
10
+ options = {}
11
+ OptionParser.new do |parser|
12
+ parser.banner = 'Usage: url_finder --help'
13
+ parser.default_argv = ARGV
14
+
15
+ parser.on('--file=/path/to/file', String, 'Input file') do |string|
16
+ options[:file] = string
17
+ end
18
+
19
+ parser.on('--format=file_format', String, 'Input format (html, markdown, csv, string)') do |string|
20
+ options[:format] = string
21
+ end
22
+
23
+ parser.on('-h', '--help', 'How to use') do
24
+ puts parser
25
+ exit
26
+ end
27
+ end.parse!
28
+
29
+ file = options.fetch(:file) { raise(ArgumentError, '--file is required') }
30
+ file_format = options[:format]
31
+
32
+ UrlFinder.from_file(file, file_format).each do |url|
33
+ puts url
34
+ end
data/lib/url_finder.rb ADDED
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'url_finder/version'
4
+ require 'url_finder/reader'
5
+
6
+ # Gem namespace
7
+ module UrlFinder
8
+ # Find URLs in file
9
+ # @param [String] path to file
10
+ # @param [String] file_format
11
+ # of file if nil file format will tried to be infered from
12
+ # file extension (markdown, html, csv, string)
13
+ def self.from_file(path, file_format = nil)
14
+ file_format ||= path.split('.').last
15
+
16
+ from(File.read(path), file_format)
17
+ end
18
+
19
+ # Find URLs in string
20
+ # @param [String] content string
21
+ # @param [String] file_format of string (markdown, html, csv, string)
22
+ def self.from(content, file_format)
23
+ Reader.new(content, file_format).urls
24
+ end
25
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'url_finder/readers/csv_reader'
4
+ require 'url_finder/readers/html_reader'
5
+ require 'url_finder/readers/markdown_reader'
6
+ require 'url_finder/readers/string_reader'
7
+
8
+ module UrlFinder
9
+ # Handles reader delegatation
10
+ class Reader
11
+ FORMAT_READERS = {
12
+ 'markdown' => MarkdownReader,
13
+ 'md' => MarkdownReader,
14
+ 'html' => HTMLReader,
15
+ 'csv' => CSVReader,
16
+ 'string' => StringReader,
17
+ 'txt' => StringReader,
18
+ }.freeze
19
+
20
+ # The raw content
21
+ attr_reader :content
22
+
23
+ # Instansiates reader
24
+ def initialize(content, file_format)
25
+ @content = content
26
+ @file_format = file_format
27
+ end
28
+
29
+ # Returns the file format
30
+ # @return [String] the file format
31
+ def file_format
32
+ @file_format.to_s.downcase
33
+ end
34
+
35
+ # Returns the appropriate reader for the given file format or raises error
36
+ # @return [BaseReader] subclass of base reader
37
+ def urls
38
+ reader_klass = FORMAT_READERS.fetch(file_format) do
39
+ raise(ArgumentError, "unknown format #{file_format}")
40
+ end
41
+ reader_klass.new(content)
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UrlFinder
4
+ # Base class for reader implementations
5
+ class BaseReader
6
+ # Alias for #new
7
+ # @return [BaseReader] instance of BaseReader
8
+ def self.urls(*args)
9
+ new(*args)
10
+ end
11
+
12
+ include Enumerable
13
+
14
+ attr_reader :content
15
+
16
+ # Initialize reader
17
+ # @param [String] string to find URLs in
18
+ def initialize(content)
19
+ @content = content
20
+ @urls = nil
21
+ end
22
+
23
+ # Yield each url
24
+ # @see Enumerable#each
25
+ def each(&block)
26
+ urls.each(&block)
27
+ end
28
+
29
+ # @raise [NotImplementedError] raises since this should be implemented in subclass
30
+ def urls
31
+ raise(NotImplementedError, 'subclass must implement!')
32
+ end
33
+
34
+ # Returns true if no URLs were found
35
+ # @return [true, false] true if no URLs were found
36
+ def empty?
37
+ urls.empty?
38
+ end
39
+
40
+ # Returns the URLs as an array
41
+ # @return [Array<String>] the found URLs
42
+ def to_a
43
+ urls
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'url_finder/readers/base_reader'
5
+
6
+ module UrlFinder
7
+ # Find URLs in CSV string
8
+ class CSVReader < BaseReader
9
+ # Returns the found URLs
10
+ # @return [Array<String>] the found URLs
11
+ def urls
12
+ @urls ||= CSV.parse(content).map(&:first).compact
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'url_finder/readers/base_reader'
5
+
6
+ module UrlFinder
7
+ # Find URLs in HTML strings
8
+ class HTMLReader < BaseReader
9
+ # Returns the found URLs
10
+ # @return [Array<String>] the found URLs
11
+ def urls
12
+ document = Nokogiri::HTML(content)
13
+ @urls ||= document.css('a').map { |e| e['href'] }.compact
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'kramdown'
4
+ require 'url_finder/readers/base_reader'
5
+ require 'url_finder/readers/html_reader'
6
+
7
+ module UrlFinder
8
+ # Find URLs in Markdown strings
9
+ class MarkdownReader < BaseReader
10
+ # Returns the found URLs
11
+ # @return [Array<String>] the found URLs
12
+ def urls
13
+ html = Kramdown::Document.new(content).to_html
14
+ @urls ||= HTMLReader.new(html).urls
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'uri'
4
+ require 'url_finder/readers/base_reader'
5
+
6
+ module UrlFinder
7
+ # Find URLs in strings
8
+ class StringReader < BaseReader
9
+ # Returns the found URLs
10
+ # @return [Array<String>] the found URLs
11
+ def urls
12
+ @urls ||= URI.extract(content)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module UrlFinder
4
+ # Gem version
5
+ VERSION = '0.1.0'.freeze
6
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'url_finder/version'
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = 'url_finder'
9
+ spec.version = UrlFinder::VERSION
10
+ spec.authors = ['Jacob Burenstam']
11
+ spec.email = ['burenstam@gmail.com']
12
+
13
+ spec.summary = 'Find URLs in common file formats (Markdown, HTML, CSV, string).'
14
+ spec.description = 'Find URLs in common file formats (Markdown, HTML, CSV, string) with ease - Ruby and CLI.'
15
+ spec.homepage = 'https://github.com/buren/url_finder'
16
+ spec.license = 'MIT'
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ end
23
+ spec.bindir = 'exe'
24
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
+ spec.require_paths = ['lib']
26
+
27
+ # TODO: Consider making kramdown & nokogiri optional
28
+ spec.add_dependency 'kramdown', '~> 1.17'
29
+ spec.add_dependency 'nokogiri', '~> 1.8'
30
+
31
+ spec.add_development_dependency 'bundler', '~> 1.16'
32
+ spec.add_development_dependency 'byebug'
33
+ spec.add_development_dependency 'yard'
34
+ spec.add_development_dependency 'rake', '~> 10.0'
35
+ spec.add_development_dependency 'rspec', '~> 3.0'
36
+ end
metadata ADDED
@@ -0,0 +1,166 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: url_finder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jacob Burenstam
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-08-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: kramdown
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.17'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.17'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.8'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.8'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.16'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.16'
55
+ - !ruby/object:Gem::Dependency
56
+ name: byebug
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: yard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '10.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '10.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.0'
111
+ description: Find URLs in common file formats (Markdown, HTML, CSV, string) with ease
112
+ - Ruby and CLI.
113
+ email:
114
+ - burenstam@gmail.com
115
+ executables:
116
+ - url_finder
117
+ extensions: []
118
+ extra_rdoc_files: []
119
+ files:
120
+ - ".byebug_history"
121
+ - ".gitignore"
122
+ - ".rspec"
123
+ - ".rubocop.yml"
124
+ - ".ruby-style-guide.yml"
125
+ - ".travis.yml"
126
+ - Gemfile
127
+ - LICENSE.txt
128
+ - README.md
129
+ - Rakefile
130
+ - bin/console
131
+ - bin/setup
132
+ - exe/url_finder
133
+ - lib/url_finder.rb
134
+ - lib/url_finder/reader.rb
135
+ - lib/url_finder/readers/base_reader.rb
136
+ - lib/url_finder/readers/csv_reader.rb
137
+ - lib/url_finder/readers/html_reader.rb
138
+ - lib/url_finder/readers/markdown_reader.rb
139
+ - lib/url_finder/readers/string_reader.rb
140
+ - lib/url_finder/version.rb
141
+ - url_finder.gemspec
142
+ homepage: https://github.com/buren/url_finder
143
+ licenses:
144
+ - MIT
145
+ metadata: {}
146
+ post_install_message:
147
+ rdoc_options: []
148
+ require_paths:
149
+ - lib
150
+ required_ruby_version: !ruby/object:Gem::Requirement
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
155
+ required_rubygems_version: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ requirements: []
161
+ rubyforge_project:
162
+ rubygems_version: 2.7.6
163
+ signing_key:
164
+ specification_version: 4
165
+ summary: Find URLs in common file formats (Markdown, HTML, CSV, string).
166
+ test_files: []