url_finder 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.byebug_history +3 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.rubocop.yml +6 -0
- data/.ruby-style-guide.yml +265 -0
- data/.travis.yml +6 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +79 -0
- data/Rakefile +8 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/exe/url_finder +34 -0
- data/lib/url_finder.rb +25 -0
- data/lib/url_finder/reader.rb +44 -0
- data/lib/url_finder/readers/base_reader.rb +46 -0
- data/lib/url_finder/readers/csv_reader.rb +15 -0
- data/lib/url_finder/readers/html_reader.rb +16 -0
- data/lib/url_finder/readers/markdown_reader.rb +17 -0
- data/lib/url_finder/readers/string_reader.rb +15 -0
- data/lib/url_finder/version.rb +6 -0
- data/url_finder.gemspec +36 -0
- metadata +166 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7514dce4f92bf06c8d4de7ea64778e43f5edde852e529365c7e55b61353df6cc
|
4
|
+
data.tar.gz: 563a39ca2f31130388a9ed2bedb56417f49196c0e11fdb31f1d83aaf99df6485
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d78fb982fa14ae0006752baa5d82441b09b3a676305405c42cc662794841c39fbb9aa679cb01f42e111d6e467cedb91b5233d568bd360bf005bd77c6580b3cc9
|
7
|
+
data.tar.gz: 805618369adfbb8bf1ab2ae067cb1f19392ee6b30cc58b36d21318924e3d6a8023b0df8f0302b291140c3e6b2bb9fa7f8d38e9f84fc2e89e5f30328202f573ae
|
data/.byebug_history
ADDED
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,265 @@
|
|
1
|
+
Rails:
|
2
|
+
Enabled: false
|
3
|
+
AllCops:
|
4
|
+
TargetRubyVersion: 2.3
|
5
|
+
Exclude:
|
6
|
+
- "vendor/**/*"
|
7
|
+
UseCache: true
|
8
|
+
Style/CollectionMethods:
|
9
|
+
Description: Preferred collection methods.
|
10
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#map-find-select-reduce-size
|
11
|
+
Enabled: true
|
12
|
+
PreferredMethods:
|
13
|
+
collect: map
|
14
|
+
collect!: map!
|
15
|
+
find: detect
|
16
|
+
find_all: select
|
17
|
+
reduce: inject
|
18
|
+
Style/RedundantFreeze:
|
19
|
+
Description: "Checks usages of Object#freeze on immutable objects."
|
20
|
+
Enabled: false
|
21
|
+
Layout/DotPosition:
|
22
|
+
Description: Checks the position of the dot in multi-line method calls.
|
23
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#consistent-multi-line-chains
|
24
|
+
Enabled: true
|
25
|
+
EnforcedStyle: trailing
|
26
|
+
SupportedStyles:
|
27
|
+
- leading
|
28
|
+
- trailing
|
29
|
+
Naming/FileName:
|
30
|
+
Description: Use snake_case for source file names.
|
31
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#snake-case-files
|
32
|
+
Enabled: false
|
33
|
+
Exclude: []
|
34
|
+
Naming/MemoizedInstanceVariableName:
|
35
|
+
Description: Memoized method name should match memo instance variable name.
|
36
|
+
Enabled: false
|
37
|
+
Naming/UncommunicativeMethodParamName:
|
38
|
+
Description: >-
|
39
|
+
Checks for method parameter names that contain capital letters,
|
40
|
+
end in numbers, or do not meet a minimal length.
|
41
|
+
Enabled: false
|
42
|
+
Style/GuardClause:
|
43
|
+
Description: Check for conditionals that can be replaced with guard clauses
|
44
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-nested-conditionals
|
45
|
+
Enabled: true
|
46
|
+
MinBodyLength: 3
|
47
|
+
Style/IfUnlessModifier:
|
48
|
+
Description: Favor modifier if/unless usage when you have a single-line body.
|
49
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#if-as-a-modifier
|
50
|
+
Enabled: false
|
51
|
+
Style/OptionHash:
|
52
|
+
Description: Don't use option hashes when you can use keyword arguments.
|
53
|
+
Enabled: false
|
54
|
+
Style/PercentLiteralDelimiters:
|
55
|
+
Description: Use `%`-literal delimiters consistently
|
56
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#percent-literal-braces
|
57
|
+
Enabled: false
|
58
|
+
PreferredDelimiters:
|
59
|
+
"%": "()"
|
60
|
+
"%i": "()"
|
61
|
+
"%q": "()"
|
62
|
+
"%Q": "()"
|
63
|
+
"%r": "{}"
|
64
|
+
"%s": "()"
|
65
|
+
"%w": "()"
|
66
|
+
"%W": "()"
|
67
|
+
"%x": "()"
|
68
|
+
Naming/PredicateName:
|
69
|
+
Description: Check the names of predicate methods.
|
70
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#bool-methods-qmark
|
71
|
+
Enabled: true
|
72
|
+
NamePrefix:
|
73
|
+
- is_
|
74
|
+
- has_
|
75
|
+
- have_
|
76
|
+
NamePrefixBlacklist:
|
77
|
+
- is_
|
78
|
+
Exclude:
|
79
|
+
- spec/**/*
|
80
|
+
Style/RaiseArgs:
|
81
|
+
Description: Checks the arguments passed to raise/fail.
|
82
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#exception-class-messages
|
83
|
+
Enabled: false
|
84
|
+
EnforcedStyle: exploded
|
85
|
+
SupportedStyles:
|
86
|
+
- compact
|
87
|
+
- exploded
|
88
|
+
Style/SignalException:
|
89
|
+
Description: Checks for proper usage of fail and raise.
|
90
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#fail-method
|
91
|
+
Enabled: false
|
92
|
+
EnforcedStyle: semantic
|
93
|
+
SupportedStyles:
|
94
|
+
- only_raise
|
95
|
+
- only_fail
|
96
|
+
- semantic
|
97
|
+
Style/SingleLineBlockParams:
|
98
|
+
Description: Enforces the names of some block params.
|
99
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#reduce-blocks
|
100
|
+
Enabled: false
|
101
|
+
Methods:
|
102
|
+
- reduce:
|
103
|
+
- a
|
104
|
+
- e
|
105
|
+
- inject:
|
106
|
+
- a
|
107
|
+
- e
|
108
|
+
Style/TrivialAccessors:
|
109
|
+
Enabled: false
|
110
|
+
Style/SingleLineMethods:
|
111
|
+
Description: Avoid single-line methods.
|
112
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-single-line-methods
|
113
|
+
Enabled: false
|
114
|
+
AllowIfMethodIsEmpty: true
|
115
|
+
Style/StringLiterals:
|
116
|
+
Description: Checks if uses of quotes match the configured preference.
|
117
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#consistent-string-literals
|
118
|
+
Enabled: true
|
119
|
+
EnforcedStyle: single_quotes
|
120
|
+
SupportedStyles:
|
121
|
+
- single_quotes
|
122
|
+
- double_quotes
|
123
|
+
Style/MixinUsage:
|
124
|
+
Enabled: true
|
125
|
+
Exclude:
|
126
|
+
- exe/*
|
127
|
+
Style/StringLiteralsInInterpolation:
|
128
|
+
Description: Checks if uses of quotes inside expressions in interpolated strings
|
129
|
+
match the configured preference.
|
130
|
+
Enabled: true
|
131
|
+
EnforcedStyle: single_quotes
|
132
|
+
SupportedStyles:
|
133
|
+
- single_quotes
|
134
|
+
- double_quotes
|
135
|
+
Style/TrailingCommaInArrayLiteral:
|
136
|
+
Description: Checks for trailing comma in parameter lists and literals.
|
137
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas
|
138
|
+
Enabled: true
|
139
|
+
EnforcedStyleForMultiline: comma
|
140
|
+
Style/TrailingCommaInHashLiteral:
|
141
|
+
Description: Checks for trailing comma in parameter lists and literals.
|
142
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-trailing-array-commas
|
143
|
+
Enabled: true
|
144
|
+
EnforcedStyleForMultiline: comma
|
145
|
+
Metrics/AbcSize:
|
146
|
+
Description: A calculated magnitude based on number of assignments, branches, and
|
147
|
+
conditions.
|
148
|
+
Enabled: false
|
149
|
+
Max: 15
|
150
|
+
Metrics/ClassLength:
|
151
|
+
Description: Avoid classes longer than 100 lines of code.
|
152
|
+
Enabled: false
|
153
|
+
CountComments: false
|
154
|
+
Max: 100
|
155
|
+
Metrics/ModuleLength:
|
156
|
+
CountComments: false
|
157
|
+
Max: 100
|
158
|
+
Description: Avoid modules longer than 100 lines of code.
|
159
|
+
Enabled: false
|
160
|
+
Metrics/CyclomaticComplexity:
|
161
|
+
Description: A complexity metric that is strongly correlated to the number of test
|
162
|
+
cases needed to validate a method.
|
163
|
+
Enabled: false
|
164
|
+
Max: 6
|
165
|
+
Metrics/MethodLength:
|
166
|
+
Description: Avoid methods longer than 10 lines of code.
|
167
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#short-methods
|
168
|
+
Enabled: false
|
169
|
+
CountComments: false
|
170
|
+
Max: 10
|
171
|
+
Metrics/ParameterLists:
|
172
|
+
Description: Avoid parameter lists longer than three or four parameters.
|
173
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#too-many-params
|
174
|
+
Enabled: false
|
175
|
+
Max: 5
|
176
|
+
CountKeywordArgs: true
|
177
|
+
Metrics/PerceivedComplexity:
|
178
|
+
Description: A complexity metric geared towards measuring complexity for a human
|
179
|
+
reader.
|
180
|
+
Enabled: false
|
181
|
+
Max: 7
|
182
|
+
Metrics/LineLength:
|
183
|
+
Description: Maximum line length
|
184
|
+
Enabled: true
|
185
|
+
Max: 95
|
186
|
+
Exclude:
|
187
|
+
- exe/url_finder
|
188
|
+
- lib/url_finder/cli.rb
|
189
|
+
- Gemfile
|
190
|
+
- url_finder.gemspec
|
191
|
+
- spec/**/*
|
192
|
+
Metrics/BlockLength:
|
193
|
+
Enabled: true
|
194
|
+
Exclude:
|
195
|
+
- lib/url_finder/cli/*
|
196
|
+
- spec/**/*
|
197
|
+
Lint/AssignmentInCondition:
|
198
|
+
Description: Don't use assignment in conditions.
|
199
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#safe-assignment-in-condition
|
200
|
+
Enabled: false
|
201
|
+
AllowSafeAssignment: true
|
202
|
+
Style/InlineComment:
|
203
|
+
Description: Avoid inline comments.
|
204
|
+
Enabled: false
|
205
|
+
Naming/AccessorMethodName:
|
206
|
+
Description: Check the naming of accessor methods for get_/set_.
|
207
|
+
Enabled: false
|
208
|
+
Style/Alias:
|
209
|
+
Description: Use alias_method instead of alias.
|
210
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#alias-method
|
211
|
+
Enabled: false
|
212
|
+
Style/Documentation:
|
213
|
+
Description: Document classes and non-namespace modules.
|
214
|
+
Enabled: false
|
215
|
+
Style/DoubleNegation:
|
216
|
+
Description: Checks for uses of double negation (!!).
|
217
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-bang-bang
|
218
|
+
Enabled: false
|
219
|
+
Style/EachWithObject:
|
220
|
+
Description: Prefer `each_with_object` over `inject` or `reduce`.
|
221
|
+
Enabled: false
|
222
|
+
Style/EmptyLiteral:
|
223
|
+
Description: Prefer literals to Array.new/Hash.new/String.new.
|
224
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#literal-array-hash
|
225
|
+
Enabled: false
|
226
|
+
Style/ModuleFunction:
|
227
|
+
Description: Checks for usage of `extend self` in modules.
|
228
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#module-function
|
229
|
+
Enabled: false
|
230
|
+
Style/OneLineConditional:
|
231
|
+
Description: Favor the ternary operator(?:) over if/then/else/end constructs.
|
232
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#ternary-operator
|
233
|
+
Enabled: false
|
234
|
+
Style/PerlBackrefs:
|
235
|
+
Description: Avoid Perl-style regex back references.
|
236
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-perl-regexp-last-matchers
|
237
|
+
Enabled: false
|
238
|
+
Style/Send:
|
239
|
+
Description: Prefer `Object#__send__` or `Object#public_send` to `send`, as `send`
|
240
|
+
may overlap with existing methods.
|
241
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#prefer-public-send
|
242
|
+
Enabled: false
|
243
|
+
Style/SpecialGlobalVars:
|
244
|
+
Description: Avoid Perl-style global variables.
|
245
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#no-cryptic-perlisms
|
246
|
+
Enabled: false
|
247
|
+
Style/VariableInterpolation:
|
248
|
+
Description: Don't interpolate global, instance and class variables directly in
|
249
|
+
strings.
|
250
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#curlies-interpolate
|
251
|
+
Enabled: false
|
252
|
+
Style/WhenThen:
|
253
|
+
Description: Use when x then ... for one-line cases.
|
254
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#one-line-cases
|
255
|
+
Enabled: false
|
256
|
+
Lint/EachWithObjectArgument:
|
257
|
+
Description: Check for immutable argument given to each_with_object.
|
258
|
+
Enabled: true
|
259
|
+
Lint/HandleExceptions:
|
260
|
+
Description: Don't suppress exception.
|
261
|
+
StyleGuide: https://github.com/bbatsov/ruby-style-guide#dont-hide-exceptions
|
262
|
+
Enabled: false
|
263
|
+
Lint/LiteralInInterpolation:
|
264
|
+
Description: Checks for literals used in interpolation.
|
265
|
+
Enabled: false
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Jacob Burenstam
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# UrlFinder
|
2
|
+
|
3
|
+
Find URLs in various file formats - supports markdown, HTML, CSV and regular text.
|
4
|
+
|
5
|
+
- [Usage](#usage)
|
6
|
+
- [CLI](#cli)
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
gem 'url_finder'
|
14
|
+
```
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
$ bundle
|
19
|
+
|
20
|
+
Or install it yourself as:
|
21
|
+
|
22
|
+
$ gem install url_finder
|
23
|
+
|
24
|
+
## Usage
|
25
|
+
|
26
|
+
Find URLs in file, we will infer the format from the extension
|
27
|
+
|
28
|
+
```ruby
|
29
|
+
UrlFinder.from_file('README.md').each do |url|
|
30
|
+
puts "Found: #{url}"
|
31
|
+
end
|
32
|
+
```
|
33
|
+
|
34
|
+
You can explicitly pass the format if the file lacks an extension
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
UrlFinder.from_file('README', 'md').each do |url|
|
38
|
+
puts "Found: #{url}"
|
39
|
+
end
|
40
|
+
```
|
41
|
+
|
42
|
+
Supported formats are `markdown` (aliased as `md`), `html`, `csv` and `string`.
|
43
|
+
|
44
|
+
Find URLs in string
|
45
|
+
```ruby
|
46
|
+
html = '<a href="http://example.com">example.com</a>'
|
47
|
+
UrlFinder.from(html, 'html').each do |url|
|
48
|
+
puts "Found: #{url}"
|
49
|
+
end
|
50
|
+
```
|
51
|
+
|
52
|
+
## CLI
|
53
|
+
|
54
|
+
```
|
55
|
+
Usage: url_finder --help
|
56
|
+
--file=/path/to/file Input file
|
57
|
+
--format=file_format Input format (html, markdown, csv, string)
|
58
|
+
-h, --help How to use
|
59
|
+
```
|
60
|
+
|
61
|
+
## Wish list
|
62
|
+
|
63
|
+
- Better CSV support
|
64
|
+
+ Current support is extremely crude and makes a lot of assumptions about the file..
|
65
|
+
- RDoc support
|
66
|
+
|
67
|
+
## Development
|
68
|
+
|
69
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
70
|
+
|
71
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
72
|
+
|
73
|
+
## Contributing
|
74
|
+
|
75
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/buren/url_finder.
|
76
|
+
|
77
|
+
## License
|
78
|
+
|
79
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'url_finder'
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
11
|
+
# require "pry"
|
12
|
+
# Pry.start
|
13
|
+
|
14
|
+
require 'irb'
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/exe/url_finder
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# for dev purposes
|
5
|
+
require 'bundler/setup' if ENV['URL_FINDER_GEM_DEV']
|
6
|
+
|
7
|
+
require 'url_finder'
|
8
|
+
require 'optparse'
|
9
|
+
|
10
|
+
options = {}
|
11
|
+
OptionParser.new do |parser|
|
12
|
+
parser.banner = 'Usage: url_finder --help'
|
13
|
+
parser.default_argv = ARGV
|
14
|
+
|
15
|
+
parser.on('--file=/path/to/file', String, 'Input file') do |string|
|
16
|
+
options[:file] = string
|
17
|
+
end
|
18
|
+
|
19
|
+
parser.on('--format=file_format', String, 'Input format (html, markdown, csv, string)') do |string|
|
20
|
+
options[:format] = string
|
21
|
+
end
|
22
|
+
|
23
|
+
parser.on('-h', '--help', 'How to use') do
|
24
|
+
puts parser
|
25
|
+
exit
|
26
|
+
end
|
27
|
+
end.parse!
|
28
|
+
|
29
|
+
file = options.fetch(:file) { raise(ArgumentError, '--file is required') }
|
30
|
+
file_format = options[:format]
|
31
|
+
|
32
|
+
UrlFinder.from_file(file, file_format).each do |url|
|
33
|
+
puts url
|
34
|
+
end
|
data/lib/url_finder.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'url_finder/version'
|
4
|
+
require 'url_finder/reader'
|
5
|
+
|
6
|
+
# Gem namespace
|
7
|
+
module UrlFinder
|
8
|
+
# Find URLs in file
|
9
|
+
# @param [String] path to file
|
10
|
+
# @param [String] file_format
|
11
|
+
# of file if nil file format will tried to be infered from
|
12
|
+
# file extension (markdown, html, csv, string)
|
13
|
+
def self.from_file(path, file_format = nil)
|
14
|
+
file_format ||= path.split('.').last
|
15
|
+
|
16
|
+
from(File.read(path), file_format)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Find URLs in string
|
20
|
+
# @param [String] content string
|
21
|
+
# @param [String] file_format of string (markdown, html, csv, string)
|
22
|
+
def self.from(content, file_format)
|
23
|
+
Reader.new(content, file_format).urls
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'url_finder/readers/csv_reader'
|
4
|
+
require 'url_finder/readers/html_reader'
|
5
|
+
require 'url_finder/readers/markdown_reader'
|
6
|
+
require 'url_finder/readers/string_reader'
|
7
|
+
|
8
|
+
module UrlFinder
|
9
|
+
# Handles reader delegatation
|
10
|
+
class Reader
|
11
|
+
FORMAT_READERS = {
|
12
|
+
'markdown' => MarkdownReader,
|
13
|
+
'md' => MarkdownReader,
|
14
|
+
'html' => HTMLReader,
|
15
|
+
'csv' => CSVReader,
|
16
|
+
'string' => StringReader,
|
17
|
+
'txt' => StringReader,
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
# The raw content
|
21
|
+
attr_reader :content
|
22
|
+
|
23
|
+
# Instansiates reader
|
24
|
+
def initialize(content, file_format)
|
25
|
+
@content = content
|
26
|
+
@file_format = file_format
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns the file format
|
30
|
+
# @return [String] the file format
|
31
|
+
def file_format
|
32
|
+
@file_format.to_s.downcase
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns the appropriate reader for the given file format or raises error
|
36
|
+
# @return [BaseReader] subclass of base reader
|
37
|
+
def urls
|
38
|
+
reader_klass = FORMAT_READERS.fetch(file_format) do
|
39
|
+
raise(ArgumentError, "unknown format #{file_format}")
|
40
|
+
end
|
41
|
+
reader_klass.new(content)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module UrlFinder
|
4
|
+
# Base class for reader implementations
|
5
|
+
class BaseReader
|
6
|
+
# Alias for #new
|
7
|
+
# @return [BaseReader] instance of BaseReader
|
8
|
+
def self.urls(*args)
|
9
|
+
new(*args)
|
10
|
+
end
|
11
|
+
|
12
|
+
include Enumerable
|
13
|
+
|
14
|
+
attr_reader :content
|
15
|
+
|
16
|
+
# Initialize reader
|
17
|
+
# @param [String] string to find URLs in
|
18
|
+
def initialize(content)
|
19
|
+
@content = content
|
20
|
+
@urls = nil
|
21
|
+
end
|
22
|
+
|
23
|
+
# Yield each url
|
24
|
+
# @see Enumerable#each
|
25
|
+
def each(&block)
|
26
|
+
urls.each(&block)
|
27
|
+
end
|
28
|
+
|
29
|
+
# @raise [NotImplementedError] raises since this should be implemented in subclass
|
30
|
+
def urls
|
31
|
+
raise(NotImplementedError, 'subclass must implement!')
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns true if no URLs were found
|
35
|
+
# @return [true, false] true if no URLs were found
|
36
|
+
def empty?
|
37
|
+
urls.empty?
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns the URLs as an array
|
41
|
+
# @return [Array<String>] the found URLs
|
42
|
+
def to_a
|
43
|
+
urls
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'url_finder/readers/base_reader'
|
5
|
+
|
6
|
+
module UrlFinder
|
7
|
+
# Find URLs in CSV string
|
8
|
+
class CSVReader < BaseReader
|
9
|
+
# Returns the found URLs
|
10
|
+
# @return [Array<String>] the found URLs
|
11
|
+
def urls
|
12
|
+
@urls ||= CSV.parse(content).map(&:first).compact
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'url_finder/readers/base_reader'
|
5
|
+
|
6
|
+
module UrlFinder
|
7
|
+
# Find URLs in HTML strings
|
8
|
+
class HTMLReader < BaseReader
|
9
|
+
# Returns the found URLs
|
10
|
+
# @return [Array<String>] the found URLs
|
11
|
+
def urls
|
12
|
+
document = Nokogiri::HTML(content)
|
13
|
+
@urls ||= document.css('a').map { |e| e['href'] }.compact
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'kramdown'
|
4
|
+
require 'url_finder/readers/base_reader'
|
5
|
+
require 'url_finder/readers/html_reader'
|
6
|
+
|
7
|
+
module UrlFinder
|
8
|
+
# Find URLs in Markdown strings
|
9
|
+
class MarkdownReader < BaseReader
|
10
|
+
# Returns the found URLs
|
11
|
+
# @return [Array<String>] the found URLs
|
12
|
+
def urls
|
13
|
+
html = Kramdown::Document.new(content).to_html
|
14
|
+
@urls ||= HTMLReader.new(html).urls
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'uri'
|
4
|
+
require 'url_finder/readers/base_reader'
|
5
|
+
|
6
|
+
module UrlFinder
|
7
|
+
# Find URLs in strings
|
8
|
+
class StringReader < BaseReader
|
9
|
+
# Returns the found URLs
|
10
|
+
# @return [Array<String>] the found URLs
|
11
|
+
def urls
|
12
|
+
@urls ||= URI.extract(content)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
data/url_finder.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'url_finder/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'url_finder'
|
9
|
+
spec.version = UrlFinder::VERSION
|
10
|
+
spec.authors = ['Jacob Burenstam']
|
11
|
+
spec.email = ['burenstam@gmail.com']
|
12
|
+
|
13
|
+
spec.summary = 'Find URLs in common file formats (Markdown, HTML, CSV, string).'
|
14
|
+
spec.description = 'Find URLs in common file formats (Markdown, HTML, CSV, string) with ease - Ruby and CLI.'
|
15
|
+
spec.homepage = 'https://github.com/buren/url_finder'
|
16
|
+
spec.license = 'MIT'
|
17
|
+
|
18
|
+
# Specify which files should be added to the gem when it is released.
|
19
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
20
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
21
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
|
+
end
|
23
|
+
spec.bindir = 'exe'
|
24
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
25
|
+
spec.require_paths = ['lib']
|
26
|
+
|
27
|
+
# TODO: Consider making kramdown & nokogiri optional
|
28
|
+
spec.add_dependency 'kramdown', '~> 1.17'
|
29
|
+
spec.add_dependency 'nokogiri', '~> 1.8'
|
30
|
+
|
31
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
32
|
+
spec.add_development_dependency 'byebug'
|
33
|
+
spec.add_development_dependency 'yard'
|
34
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
35
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,166 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: url_finder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jacob Burenstam
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-08-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: kramdown
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.17'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.17'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.8'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.16'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.16'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: byebug
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: yard
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '10.0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '10.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '3.0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '3.0'
|
111
|
+
description: Find URLs in common file formats (Markdown, HTML, CSV, string) with ease
|
112
|
+
- Ruby and CLI.
|
113
|
+
email:
|
114
|
+
- burenstam@gmail.com
|
115
|
+
executables:
|
116
|
+
- url_finder
|
117
|
+
extensions: []
|
118
|
+
extra_rdoc_files: []
|
119
|
+
files:
|
120
|
+
- ".byebug_history"
|
121
|
+
- ".gitignore"
|
122
|
+
- ".rspec"
|
123
|
+
- ".rubocop.yml"
|
124
|
+
- ".ruby-style-guide.yml"
|
125
|
+
- ".travis.yml"
|
126
|
+
- Gemfile
|
127
|
+
- LICENSE.txt
|
128
|
+
- README.md
|
129
|
+
- Rakefile
|
130
|
+
- bin/console
|
131
|
+
- bin/setup
|
132
|
+
- exe/url_finder
|
133
|
+
- lib/url_finder.rb
|
134
|
+
- lib/url_finder/reader.rb
|
135
|
+
- lib/url_finder/readers/base_reader.rb
|
136
|
+
- lib/url_finder/readers/csv_reader.rb
|
137
|
+
- lib/url_finder/readers/html_reader.rb
|
138
|
+
- lib/url_finder/readers/markdown_reader.rb
|
139
|
+
- lib/url_finder/readers/string_reader.rb
|
140
|
+
- lib/url_finder/version.rb
|
141
|
+
- url_finder.gemspec
|
142
|
+
homepage: https://github.com/buren/url_finder
|
143
|
+
licenses:
|
144
|
+
- MIT
|
145
|
+
metadata: {}
|
146
|
+
post_install_message:
|
147
|
+
rdoc_options: []
|
148
|
+
require_paths:
|
149
|
+
- lib
|
150
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - ">="
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
requirements: []
|
161
|
+
rubyforge_project:
|
162
|
+
rubygems_version: 2.7.6
|
163
|
+
signing_key:
|
164
|
+
specification_version: 4
|
165
|
+
summary: Find URLs in common file formats (Markdown, HTML, CSV, string).
|
166
|
+
test_files: []
|