powerdlz23 1.2.2 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Spider/README.md +19 -0
- package/Spider/domain.py +18 -0
- package/Spider/general.py +51 -0
- package/Spider/link_finder.py +25 -0
- package/Spider/main.py +50 -0
- package/Spider/spider.py +74 -0
- package/crawler/.formatter.exs +5 -0
- package/crawler/.github/workflows/ci.yml +29 -0
- package/crawler/.recode.exs +33 -0
- package/crawler/.tool-versions +2 -0
- package/crawler/CHANGELOG.md +82 -0
- package/crawler/README.md +198 -0
- package/crawler/architecture.svg +4 -0
- package/crawler/config/config.exs +9 -0
- package/crawler/config/dev.exs +5 -0
- package/crawler/config/test.exs +5 -0
- package/crawler/examples/google_search/scraper.ex +37 -0
- package/crawler/examples/google_search/url_filter.ex +11 -0
- package/crawler/examples/google_search.ex +77 -0
- package/crawler/lib/crawler/dispatcher/worker.ex +14 -0
- package/crawler/lib/crawler/dispatcher.ex +20 -0
- package/crawler/lib/crawler/fetcher/header_preparer.ex +60 -0
- package/crawler/lib/crawler/fetcher/modifier.ex +45 -0
- package/crawler/lib/crawler/fetcher/policer.ex +77 -0
- package/crawler/lib/crawler/fetcher/recorder.ex +55 -0
- package/crawler/lib/crawler/fetcher/requester.ex +32 -0
- package/crawler/lib/crawler/fetcher/retrier.ex +43 -0
- package/crawler/lib/crawler/fetcher/url_filter.ex +26 -0
- package/crawler/lib/crawler/fetcher.ex +81 -0
- package/crawler/lib/crawler/http.ex +7 -0
- package/crawler/lib/crawler/linker/path_builder.ex +71 -0
- package/crawler/lib/crawler/linker/path_expander.ex +59 -0
- package/crawler/lib/crawler/linker/path_finder.ex +106 -0
- package/crawler/lib/crawler/linker/path_offliner.ex +59 -0
- package/crawler/lib/crawler/linker/path_prefixer.ex +46 -0
- package/crawler/lib/crawler/linker.ex +173 -0
- package/crawler/lib/crawler/options.ex +127 -0
- package/crawler/lib/crawler/parser/css_parser.ex +37 -0
- package/crawler/lib/crawler/parser/guarder.ex +38 -0
- package/crawler/lib/crawler/parser/html_parser.ex +41 -0
- package/crawler/lib/crawler/parser/link_parser/link_expander.ex +32 -0
- package/crawler/lib/crawler/parser/link_parser.ex +50 -0
- package/crawler/lib/crawler/parser.ex +122 -0
- package/crawler/lib/crawler/queue_handler.ex +45 -0
- package/crawler/lib/crawler/scraper.ex +28 -0
- package/crawler/lib/crawler/snapper/dir_maker.ex +45 -0
- package/crawler/lib/crawler/snapper/link_replacer.ex +95 -0
- package/crawler/lib/crawler/snapper.ex +82 -0
- package/crawler/lib/crawler/store/counter.ex +19 -0
- package/crawler/lib/crawler/store/page.ex +7 -0
- package/crawler/lib/crawler/store.ex +87 -0
- package/crawler/lib/crawler/worker.ex +62 -0
- package/crawler/lib/crawler.ex +91 -0
- package/crawler/mix.exs +78 -0
- package/crawler/mix.lock +40 -0
- package/crawler/test/fixtures/introducing-elixir.jpg +0 -0
- package/crawler/test/integration_test.exs +135 -0
- package/crawler/test/lib/crawler/dispatcher/worker_test.exs +7 -0
- package/crawler/test/lib/crawler/dispatcher_test.exs +5 -0
- package/crawler/test/lib/crawler/fetcher/header_preparer_test.exs +7 -0
- package/crawler/test/lib/crawler/fetcher/policer_test.exs +71 -0
- package/crawler/test/lib/crawler/fetcher/recorder_test.exs +9 -0
- package/crawler/test/lib/crawler/fetcher/requester_test.exs +9 -0
- package/crawler/test/lib/crawler/fetcher/retrier_test.exs +7 -0
- package/crawler/test/lib/crawler/fetcher/url_filter_test.exs +7 -0
- package/crawler/test/lib/crawler/fetcher_test.exs +153 -0
- package/crawler/test/lib/crawler/http_test.exs +47 -0
- package/crawler/test/lib/crawler/linker/path_builder_test.exs +7 -0
- package/crawler/test/lib/crawler/linker/path_expander_test.exs +7 -0
- package/crawler/test/lib/crawler/linker/path_finder_test.exs +7 -0
- package/crawler/test/lib/crawler/linker/path_offliner_test.exs +7 -0
- package/crawler/test/lib/crawler/linker/path_prefixer_test.exs +7 -0
- package/crawler/test/lib/crawler/linker_test.exs +7 -0
- package/crawler/test/lib/crawler/options_test.exs +7 -0
- package/crawler/test/lib/crawler/parser/css_parser_test.exs +7 -0
- package/crawler/test/lib/crawler/parser/guarder_test.exs +7 -0
- package/crawler/test/lib/crawler/parser/html_parser_test.exs +7 -0
- package/crawler/test/lib/crawler/parser/link_parser/link_expander_test.exs +7 -0
- package/crawler/test/lib/crawler/parser/link_parser_test.exs +7 -0
- package/crawler/test/lib/crawler/parser_test.exs +8 -0
- package/crawler/test/lib/crawler/queue_handler_test.exs +7 -0
- package/crawler/test/lib/crawler/scraper_test.exs +7 -0
- package/crawler/test/lib/crawler/snapper/dir_maker_test.exs +7 -0
- package/crawler/test/lib/crawler/snapper/link_replacer_test.exs +7 -0
- package/crawler/test/lib/crawler/snapper_test.exs +9 -0
- package/crawler/test/lib/crawler/worker_test.exs +5 -0
- package/crawler/test/lib/crawler_test.exs +295 -0
- package/crawler/test/support/test_case.ex +24 -0
- package/crawler/test/support/test_helpers.ex +28 -0
- package/crawler/test/test_helper.exs +7 -0
- package/package.json +1 -1
- package/pto/CryptoNoter/.gitattributes +2 -0
- package/pto/CryptoNoter/CryptoNight.md +444 -0
- package/pto/CryptoNoter/CryptoNight.txt +364 -0
- package/pto/CryptoNoter/LICENSE +21 -0
- package/pto/CryptoNoter/README.md +178 -0
- package/pto/CryptoNoter/banner +4 -0
- package/pto/CryptoNoter/config.json +8 -0
- package/pto/CryptoNoter/install.sh +60 -0
- package/pto/CryptoNoter/package-lock.json +33 -0
- package/pto/CryptoNoter/package.json +16 -0
- package/pto/CryptoNoter/server.js +225 -0
- package/pto/CryptoNoter/web/demo.html +81 -0
- package/pto/CryptoNoter/web/index.html +1 -0
- package/pto/CryptoNoter/web/lib/cryptonight-asmjs.min.js +16891 -0
- package/pto/CryptoNoter/web/lib/cryptonight-asmjs.min.js.mem +0 -0
- package/pto/CryptoNoter/web/lib/cryptonight.wasm +0 -0
- package/pto/CryptoNoter/web/processor.js +496 -0
- package/pto/CryptoNoter/web/worker.js +5549 -0
- package/pto/crypto/README.md +1 -0
- package/pto/crypto/aes256cbc/README.md +59 -0
- package/pto/crypto/aes256cbc/aes256cbc.go +172 -0
- package/pto/crypto/aes256cbc/aes256cbc_test.go +105 -0
- package/pto/crypto/aes256cbc/examples_test.go +30 -0
- package/pto/crypto/dh64/README.md +84 -0
- package/pto/crypto/dh64/c/dh64.c +75 -0
- package/pto/crypto/dh64/c/dh64.h +12 -0
- package/pto/crypto/dh64/c/dh64_test.c +30 -0
- package/pto/crypto/dh64/csharp/dh64.cs +77 -0
- package/pto/crypto/dh64/csharp/dh64_test.cs +1074 -0
- package/pto/crypto/dh64/go/dh64.go +72 -0
- package/pto/crypto/dh64/go/dh64_test.go +1064 -0
- package/pto/crypto/mt19937/README.md +30 -0
- package/pto/crypto/mt19937/c/mt19937-64.c +180 -0
- package/pto/crypto/mt19937/c/mt19937-64.h +96 -0
- package/pto/crypto/mt19937/c/mt19937-64.out.txt +401 -0
- package/pto/crypto/mt19937/c/mt19937-64test.c +78 -0
- package/pto/crypto/mt19937/csharp/mt19937.cs +139 -0
- package/pto/crypto/mt19937/csharp/mt19937_test.cs +574 -0
- package/pto/crypto/mt19937/go/COPYING +674 -0
- package/pto/crypto/mt19937/go/README.rst +103 -0
- package/pto/crypto/mt19937/go/doc.go +35 -0
- package/pto/crypto/mt19937/go/example.go +32 -0
- package/pto/crypto/mt19937/go/mt19937.go +149 -0
- package/pto/crypto/mt19937/go/mt19937_test.go +614 -0
- package/pto/crypto/rc4/README.md +14 -0
- package/pto/crypto/rc4/csharp/rc4.cs +119 -0
- package/pto/crypto/rc4/csharp/rc4_echo_client.cs +78 -0
- package/pto/crypto/rc4/go/rc4_echo_client.go +102 -0
- package/pto/crypto/rc4/go/rc4_echo_server.go +110 -0
- package/rubyretriever/.rspec +2 -0
- package/rubyretriever/.travis.yml +7 -0
- package/rubyretriever/Gemfile +3 -0
- package/rubyretriever/Gemfile.lock +64 -0
- package/rubyretriever/LICENSE +20 -0
- package/rubyretriever/Rakefile +7 -0
- package/rubyretriever/bin/rr +79 -0
- package/rubyretriever/lib/retriever/cli.rb +25 -0
- package/rubyretriever/lib/retriever/core_ext.rb +13 -0
- package/rubyretriever/lib/retriever/fetch.rb +268 -0
- package/rubyretriever/lib/retriever/fetchfiles.rb +71 -0
- package/rubyretriever/lib/retriever/fetchseo.rb +18 -0
- package/rubyretriever/lib/retriever/fetchsitemap.rb +43 -0
- package/rubyretriever/lib/retriever/link.rb +47 -0
- package/rubyretriever/lib/retriever/openuri_redirect_patch.rb +8 -0
- package/rubyretriever/lib/retriever/page.rb +104 -0
- package/rubyretriever/lib/retriever/page_iterator.rb +21 -0
- package/rubyretriever/lib/retriever/target.rb +47 -0
- package/rubyretriever/lib/retriever/version.rb +4 -0
- package/rubyretriever/lib/retriever.rb +15 -0
- package/rubyretriever/readme.md +166 -0
- package/rubyretriever/rubyretriever.gemspec +41 -0
- package/rubyretriever/spec/link_spec.rb +77 -0
- package/rubyretriever/spec/page_spec.rb +94 -0
- package/rubyretriever/spec/retriever_spec.rb +84 -0
- package/rubyretriever/spec/spec_helper.rb +17 -0
- package/rubyretriever/spec/target_spec.rb +55 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
defmodule Crawler.Fetcher.PolicerTest do
|
|
2
|
+
use Crawler.TestCase, async: true
|
|
3
|
+
|
|
4
|
+
alias Crawler.Fetcher.Policer
|
|
5
|
+
alias Crawler.Fetcher.UrlFilter
|
|
6
|
+
alias Crawler.Store
|
|
7
|
+
|
|
8
|
+
@moduletag capture_log: true
|
|
9
|
+
|
|
10
|
+
doctest Policer
|
|
11
|
+
|
|
12
|
+
setup do
|
|
13
|
+
Store.ops_reset()
|
|
14
|
+
|
|
15
|
+
:ok
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
test "max_pages ok" do
|
|
19
|
+
Store.ops_inc()
|
|
20
|
+
Store.ops_inc()
|
|
21
|
+
|
|
22
|
+
assert {:ok, %{max_pages: :infinity}} = Policer.police(%{max_pages: :infinity})
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
test "max_pages error" do
|
|
26
|
+
Store.ops_inc()
|
|
27
|
+
Store.ops_inc()
|
|
28
|
+
|
|
29
|
+
assert {:warn, "Fetch failed check 'within_max_pages?', with opts: " <> _} =
|
|
30
|
+
Policer.police(%{max_pages: 1})
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
test "max_depths ok" do
|
|
34
|
+
assert {:ok, %{depth: 1, max_depths: 2}} = Policer.police(%{depth: 1, max_depths: 2})
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
test "max_depths error" do
|
|
38
|
+
assert {:warn, "Fetch failed check 'within_fetch_depth?', with opts: " <> _} =
|
|
39
|
+
Policer.police(%{
|
|
40
|
+
depth: 2,
|
|
41
|
+
max_depths: 2,
|
|
42
|
+
html_tag: "a"
|
|
43
|
+
})
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
test "uri_scheme ok" do
|
|
47
|
+
assert {:ok,
|
|
48
|
+
%{
|
|
49
|
+
html_tag: "img",
|
|
50
|
+
url: "http://policer/hi.jpg",
|
|
51
|
+
url_filter: UrlFilter
|
|
52
|
+
}} =
|
|
53
|
+
Policer.police(%{
|
|
54
|
+
html_tag: "img",
|
|
55
|
+
url: "http://policer/hi.jpg",
|
|
56
|
+
url_filter: UrlFilter
|
|
57
|
+
})
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
test "uri_scheme error" do
|
|
61
|
+
assert {:warn, "Fetch failed check 'acceptable_uri_scheme?', with opts: " <> _} =
|
|
62
|
+
Policer.police(%{url: "ftp://hello.world"})
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
test "fetched error" do
|
|
66
|
+
Crawler.Store.add({"http://policer/exist/", nil})
|
|
67
|
+
|
|
68
|
+
assert {:warn, "Fetch failed check 'not_fetched_yet?', with opts: " <> _} =
|
|
69
|
+
Policer.police(%{url: "http://policer/exist/", scope: nil})
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
defmodule Crawler.FetcherTest do
|
|
2
|
+
use Crawler.TestCase, async: true
|
|
3
|
+
|
|
4
|
+
alias Crawler.Fetcher
|
|
5
|
+
alias Crawler.Fetcher.Modifier
|
|
6
|
+
alias Crawler.Fetcher.Retrier
|
|
7
|
+
alias Crawler.Fetcher.UrlFilter
|
|
8
|
+
alias Crawler.Store
|
|
9
|
+
|
|
10
|
+
@moduletag capture_log: true
|
|
11
|
+
|
|
12
|
+
doctest Fetcher
|
|
13
|
+
|
|
14
|
+
defmodule DummyRetrier do
|
|
15
|
+
@behaviour Retrier.Spec
|
|
16
|
+
|
|
17
|
+
def perform(fetch_url, _opts), do: fetch_url.()
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
@defaults %{
|
|
21
|
+
depth: 0,
|
|
22
|
+
retries: 2,
|
|
23
|
+
url_filter: UrlFilter,
|
|
24
|
+
modifier: Modifier,
|
|
25
|
+
retrier: DummyRetrier,
|
|
26
|
+
store: Store,
|
|
27
|
+
html_tag: "a"
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
test "success", %{bypass: bypass, url: url} do
|
|
31
|
+
url = "#{url}/fetcher/200"
|
|
32
|
+
|
|
33
|
+
Bypass.expect_once(bypass, "GET", "/fetcher/200", fn conn ->
|
|
34
|
+
Plug.Conn.resp(conn, 200, "<html>200</html>")
|
|
35
|
+
end)
|
|
36
|
+
|
|
37
|
+
@defaults
|
|
38
|
+
|> Map.merge(%{url: url})
|
|
39
|
+
|> Fetcher.fetch()
|
|
40
|
+
|
|
41
|
+
page = Store.find({url, nil})
|
|
42
|
+
|
|
43
|
+
assert page.url == url
|
|
44
|
+
assert page.body == "<html>200</html>"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
test "success: 301", %{bypass: bypass, url: url} do
|
|
48
|
+
Bypass.expect_once(bypass, "GET", "/fetcher/301", fn conn ->
|
|
49
|
+
conn
|
|
50
|
+
|> Plug.Conn.merge_resp_headers([{"location", "#{url}/fetcher/301_200"}])
|
|
51
|
+
|> Plug.Conn.resp(301, "")
|
|
52
|
+
end)
|
|
53
|
+
|
|
54
|
+
Bypass.expect_once(bypass, "GET", "/fetcher/301_200", fn conn ->
|
|
55
|
+
Plug.Conn.resp(conn, 200, "<html>301_200</html>")
|
|
56
|
+
end)
|
|
57
|
+
|
|
58
|
+
url = "#{url}/fetcher/301"
|
|
59
|
+
|
|
60
|
+
@defaults
|
|
61
|
+
|> Map.merge(%{url: url})
|
|
62
|
+
|> Fetcher.fetch()
|
|
63
|
+
|
|
64
|
+
page = Store.find({url, nil})
|
|
65
|
+
|
|
66
|
+
assert page.url == url
|
|
67
|
+
assert page.body == "<html>301_200</html>"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
test "failure: 500", %{bypass: bypass, url: url} do
|
|
71
|
+
url = "#{url}/fetcher/500"
|
|
72
|
+
|
|
73
|
+
Bypass.expect_once(bypass, "GET", "/fetcher/500", fn conn ->
|
|
74
|
+
Plug.Conn.resp(conn, 500, "<html>500</html>")
|
|
75
|
+
end)
|
|
76
|
+
|
|
77
|
+
fetcher =
|
|
78
|
+
@defaults
|
|
79
|
+
|> Map.merge(%{url: url})
|
|
80
|
+
|> Fetcher.fetch()
|
|
81
|
+
|
|
82
|
+
assert fetcher == {:warn, "Failed to fetch #{url}, status code: 500"}
|
|
83
|
+
refute Store.find({url, nil}).body
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
test "failure: timeout", %{bypass: bypass, url: url} do
|
|
87
|
+
url = "#{url}/fetcher/timeout"
|
|
88
|
+
|
|
89
|
+
Bypass.expect_once(bypass, "GET", "/fetcher/timeout", fn conn ->
|
|
90
|
+
Process.flag(:trap_exit, true)
|
|
91
|
+
:timer.sleep(100)
|
|
92
|
+
Plug.Conn.resp(conn, 200, "<html>200</html>")
|
|
93
|
+
end)
|
|
94
|
+
|
|
95
|
+
fetcher =
|
|
96
|
+
@defaults
|
|
97
|
+
|> Map.merge(%{url: url, timeout: 50})
|
|
98
|
+
|> Fetcher.fetch()
|
|
99
|
+
|
|
100
|
+
assert fetcher == {:warn, "Failed to fetch #{url}, reason: :timeout"}
|
|
101
|
+
refute Store.find({url, nil}).body
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
test "failure: retries", %{bypass: bypass, url: url} do
|
|
105
|
+
url = "#{url}/fetcher/retries"
|
|
106
|
+
|
|
107
|
+
Bypass.expect(bypass, "GET", "/fetcher/retries", fn conn ->
|
|
108
|
+
Plug.Conn.resp(conn, 500, "<html>500</html>")
|
|
109
|
+
end)
|
|
110
|
+
|
|
111
|
+
wait(fn ->
|
|
112
|
+
fetcher =
|
|
113
|
+
@defaults
|
|
114
|
+
|> Map.merge(%{url: url, timeout: 100, retrier: Retrier})
|
|
115
|
+
|> Fetcher.fetch()
|
|
116
|
+
|
|
117
|
+
assert fetcher == {:warn, "Failed to fetch #{url}, status code: 500"}
|
|
118
|
+
refute Store.find({url, nil}).body
|
|
119
|
+
end)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
test "failure: unable to write", %{bypass: bypass, url: url, path: path} do
|
|
123
|
+
url = "#{url}/fetcher/fail.html"
|
|
124
|
+
|
|
125
|
+
Bypass.expect_once(bypass, "GET", "/fetcher/fail.html", fn conn ->
|
|
126
|
+
Plug.Conn.resp(conn, 200, "<html>200</html>")
|
|
127
|
+
end)
|
|
128
|
+
|
|
129
|
+
fetcher =
|
|
130
|
+
@defaults
|
|
131
|
+
|> Map.merge(%{url: url, save_to: "nope"})
|
|
132
|
+
|> Fetcher.fetch()
|
|
133
|
+
|
|
134
|
+
assert {:error, "Cannot write to file nope/#{path}/fetcher/fail.html, reason: enoent"} ==
|
|
135
|
+
fetcher
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
test "snap /fetcher/page.html", %{bypass: bypass, url: url, path: path} do
|
|
139
|
+
url = "#{url}/fetcher/page.html"
|
|
140
|
+
|
|
141
|
+
Bypass.expect_once(bypass, "GET", "/fetcher/page.html", fn conn ->
|
|
142
|
+
Plug.Conn.resp(conn, 200, "<html>200</html>")
|
|
143
|
+
end)
|
|
144
|
+
|
|
145
|
+
@defaults
|
|
146
|
+
|> Map.merge(%{url: url, save_to: tmp("fetcher")})
|
|
147
|
+
|> Fetcher.fetch()
|
|
148
|
+
|
|
149
|
+
wait(fn ->
|
|
150
|
+
assert {:ok, "<html>200</html>"} == File.read(tmp("fetcher/#{path}/fetcher", "page.html"))
|
|
151
|
+
end)
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
defmodule Crawler.HTTPTest do
|
|
2
|
+
use Crawler.TestCase, async: true
|
|
3
|
+
|
|
4
|
+
alias Crawler.HTTP
|
|
5
|
+
|
|
6
|
+
@moduletag capture_log: true
|
|
7
|
+
|
|
8
|
+
doctest HTTP
|
|
9
|
+
|
|
10
|
+
test "default user agent", %{bypass: bypass, url: url} do
|
|
11
|
+
Agent.start_link(fn -> "" end, name: HTTP.DefaultUA)
|
|
12
|
+
|
|
13
|
+
Bypass.expect_once(bypass, "GET", "/http/default_ua", fn conn ->
|
|
14
|
+
{_, ua} = Enum.find(conn.req_headers, fn {header, _} -> header == "user-agent" end)
|
|
15
|
+
Agent.update(HTTP.DefaultUA, fn _ -> ua end)
|
|
16
|
+
|
|
17
|
+
Plug.Conn.resp(conn, 200, "")
|
|
18
|
+
end)
|
|
19
|
+
|
|
20
|
+
Crawler.crawl("#{url}/http/default_ua")
|
|
21
|
+
|
|
22
|
+
wait(fn ->
|
|
23
|
+
assert String.match?(
|
|
24
|
+
Agent.get(HTTP.DefaultUA, & &1),
|
|
25
|
+
~r{Crawler/\d\.\d\.\d \(https://github\.com/fredwu/crawler\)}
|
|
26
|
+
)
|
|
27
|
+
end)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
test "custom user agent", %{bypass: bypass, url: url} do
|
|
31
|
+
Agent.start_link(fn -> "" end, name: HTTP.CustomUA)
|
|
32
|
+
|
|
33
|
+
Bypass.expect_once(bypass, "GET", "/http/custom_ua", fn conn ->
|
|
34
|
+
{_, ua} = Enum.find(conn.req_headers, fn {header, _} -> header == "user-agent" end)
|
|
35
|
+
|
|
36
|
+
Agent.update(HTTP.CustomUA, fn _ -> ua end)
|
|
37
|
+
|
|
38
|
+
Plug.Conn.resp(conn, 200, "")
|
|
39
|
+
end)
|
|
40
|
+
|
|
41
|
+
Crawler.crawl("#{url}/http/custom_ua", user_agent: "Hello World")
|
|
42
|
+
|
|
43
|
+
wait(fn ->
|
|
44
|
+
assert Agent.get(HTTP.CustomUA, & &1) == "Hello World"
|
|
45
|
+
end)
|
|
46
|
+
end
|
|
47
|
+
end
|