curl_impersonate 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,155 @@
1
+ #ifndef CURLINC_URLAPI_H
2
+ #define CURLINC_URLAPI_H
3
+ /***************************************************************************
4
+ * _ _ ____ _
5
+ * Project ___| | | | _ \| |
6
+ * / __| | | | |_) | |
7
+ * | (__| |_| | _ <| |___
8
+ * \___|\___/|_| \_\_____|
9
+ *
10
+ * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
11
+ *
12
+ * This software is licensed as described in the file COPYING, which
13
+ * you should have received as part of this distribution. The terms
14
+ * are also available at https://curl.se/docs/copyright.html.
15
+ *
16
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
17
+ * copies of the Software, and permit persons to whom the Software is
18
+ * furnished to do so, under the terms of the COPYING file.
19
+ *
20
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21
+ * KIND, either express or implied.
22
+ *
23
+ * SPDX-License-Identifier: curl
24
+ *
25
+ ***************************************************************************/
26
+
27
+ #include "curl.h"
28
+
29
+ #ifdef __cplusplus
30
+ extern "C" {
31
+ #endif
32
+
33
+ /* the error codes for the URL API */
34
+ typedef enum {
35
+ CURLUE_OK,
36
+ CURLUE_BAD_HANDLE, /* 1 */
37
+ CURLUE_BAD_PARTPOINTER, /* 2 */
38
+ CURLUE_MALFORMED_INPUT, /* 3 */
39
+ CURLUE_BAD_PORT_NUMBER, /* 4 */
40
+ CURLUE_UNSUPPORTED_SCHEME, /* 5 */
41
+ CURLUE_URLDECODE, /* 6 */
42
+ CURLUE_OUT_OF_MEMORY, /* 7 */
43
+ CURLUE_USER_NOT_ALLOWED, /* 8 */
44
+ CURLUE_UNKNOWN_PART, /* 9 */
45
+ CURLUE_NO_SCHEME, /* 10 */
46
+ CURLUE_NO_USER, /* 11 */
47
+ CURLUE_NO_PASSWORD, /* 12 */
48
+ CURLUE_NO_OPTIONS, /* 13 */
49
+ CURLUE_NO_HOST, /* 14 */
50
+ CURLUE_NO_PORT, /* 15 */
51
+ CURLUE_NO_QUERY, /* 16 */
52
+ CURLUE_NO_FRAGMENT, /* 17 */
53
+ CURLUE_NO_ZONEID, /* 18 */
54
+ CURLUE_BAD_FILE_URL, /* 19 */
55
+ CURLUE_BAD_FRAGMENT, /* 20 */
56
+ CURLUE_BAD_HOSTNAME, /* 21 */
57
+ CURLUE_BAD_IPV6, /* 22 */
58
+ CURLUE_BAD_LOGIN, /* 23 */
59
+ CURLUE_BAD_PASSWORD, /* 24 */
60
+ CURLUE_BAD_PATH, /* 25 */
61
+ CURLUE_BAD_QUERY, /* 26 */
62
+ CURLUE_BAD_SCHEME, /* 27 */
63
+ CURLUE_BAD_SLASHES, /* 28 */
64
+ CURLUE_BAD_USER, /* 29 */
65
+ CURLUE_LACKS_IDN, /* 30 */
66
+ CURLUE_TOO_LARGE, /* 31 */
67
+ CURLUE_LAST
68
+ } CURLUcode;
69
+
70
+ typedef enum {
71
+ CURLUPART_URL,
72
+ CURLUPART_SCHEME,
73
+ CURLUPART_USER,
74
+ CURLUPART_PASSWORD,
75
+ CURLUPART_OPTIONS,
76
+ CURLUPART_HOST,
77
+ CURLUPART_PORT,
78
+ CURLUPART_PATH,
79
+ CURLUPART_QUERY,
80
+ CURLUPART_FRAGMENT,
81
+ CURLUPART_ZONEID /* added in 7.65.0 */
82
+ } CURLUPart;
83
+
84
+ #define CURLU_DEFAULT_PORT (1<<0) /* return default port number */
85
+ #define CURLU_NO_DEFAULT_PORT (1<<1) /* act as if no port number was set,
86
+ if the port number matches the
87
+ default for the scheme */
88
+ #define CURLU_DEFAULT_SCHEME (1<<2) /* return default scheme if
89
+ missing */
90
+ #define CURLU_NON_SUPPORT_SCHEME (1<<3) /* allow non-supported scheme */
91
+ #define CURLU_PATH_AS_IS (1<<4) /* leave dot sequences */
92
+ #define CURLU_DISALLOW_USER (1<<5) /* no user+password allowed */
93
+ #define CURLU_URLDECODE (1<<6) /* URL decode on get */
94
+ #define CURLU_URLENCODE (1<<7) /* URL encode on set */
95
+ #define CURLU_APPENDQUERY (1<<8) /* append a form style part */
96
+ #define CURLU_GUESS_SCHEME (1<<9) /* legacy curl-style guessing */
97
+ #define CURLU_NO_AUTHORITY (1<<10) /* Allow empty authority when the
98
+ scheme is unknown. */
99
+ #define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
100
+ #define CURLU_PUNYCODE (1<<12) /* get the hostname in punycode */
101
+ #define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */
102
+ #define CURLU_GET_EMPTY (1<<14) /* allow empty queries and fragments
103
+ when extracting the URL or the
104
+ components */
105
+ #define CURLU_NO_GUESS_SCHEME (1<<15) /* for get, do not accept a guess */
106
+
107
+ typedef struct Curl_URL CURLU;
108
+
109
+ /*
110
+ * curl_url() creates a new CURLU handle and returns a pointer to it.
111
+ * Must be freed with curl_url_cleanup().
112
+ */
113
+ CURL_EXTERN CURLU *curl_url(void);
114
+
115
+ /*
116
+ * curl_url_cleanup() frees the CURLU handle and related resources used for
117
+ * the URL parsing. It will not free strings previously returned with the URL
118
+ * API.
119
+ */
120
+ CURL_EXTERN void curl_url_cleanup(CURLU *handle);
121
+
122
+ /*
123
+ * curl_url_dup() duplicates a CURLU handle and returns a new copy. The new
124
+ * handle must also be freed with curl_url_cleanup().
125
+ */
126
+ CURL_EXTERN CURLU *curl_url_dup(const CURLU *in);
127
+
128
+ /*
129
+ * curl_url_get() extracts a specific part of the URL from a CURLU
130
+ * handle. Returns error code. The returned pointer MUST be freed with
131
+ * curl_free() afterwards.
132
+ */
133
+ CURL_EXTERN CURLUcode curl_url_get(const CURLU *handle, CURLUPart what,
134
+ char **part, unsigned int flags);
135
+
136
+ /*
137
+ * curl_url_set() sets a specific part of the URL in a CURLU handle. Returns
138
+ * error code. The passed in string will be copied. Passing a NULL instead of
139
+ * a part string, clears that part.
140
+ */
141
+ CURL_EXTERN CURLUcode curl_url_set(CURLU *handle, CURLUPart what,
142
+ const char *part, unsigned int flags);
143
+
144
+ /*
145
+ * curl_url_strerror() turns a CURLUcode value into the equivalent human
146
+ * readable error string. This is useful for printing meaningful error
147
+ * messages.
148
+ */
149
+ CURL_EXTERN const char *curl_url_strerror(CURLUcode);
150
+
151
+ #ifdef __cplusplus
152
+ } /* end of extern "C" */
153
+ #endif
154
+
155
+ #endif /* CURLINC_URLAPI_H */
@@ -0,0 +1,85 @@
1
+ #ifndef CURLINC_WEBSOCKETS_H
2
+ #define CURLINC_WEBSOCKETS_H
3
+ /***************************************************************************
4
+ * _ _ ____ _
5
+ * Project ___| | | | _ \| |
6
+ * / __| | | | |_) | |
7
+ * | (__| |_| | _ <| |___
8
+ * \___|\___/|_| \_\_____|
9
+ *
10
+ * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
11
+ *
12
+ * This software is licensed as described in the file COPYING, which
13
+ * you should have received as part of this distribution. The terms
14
+ * are also available at https://curl.se/docs/copyright.html.
15
+ *
16
+ * You may opt to use, copy, modify, merge, publish, distribute and/or sell
17
+ * copies of the Software, and permit persons to whom the Software is
18
+ * furnished to do so, under the terms of the COPYING file.
19
+ *
20
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
21
+ * KIND, either express or implied.
22
+ *
23
+ * SPDX-License-Identifier: curl
24
+ *
25
+ ***************************************************************************/
26
+
27
+ #ifdef __cplusplus
28
+ extern "C" {
29
+ #endif
30
+
31
+ struct curl_ws_frame {
32
+ int age; /* zero */
33
+ int flags; /* See the CURLWS_* defines */
34
+ curl_off_t offset; /* the offset of this data into the frame */
35
+ curl_off_t bytesleft; /* number of pending bytes left of the payload */
36
+ size_t len; /* size of the current data chunk */
37
+ };
38
+
39
+ /* flag bits */
40
+ #define CURLWS_TEXT (1<<0)
41
+ #define CURLWS_BINARY (1<<1)
42
+ #define CURLWS_CONT (1<<2)
43
+ #define CURLWS_CLOSE (1<<3)
44
+ #define CURLWS_PING (1<<4)
45
+ #define CURLWS_OFFSET (1<<5)
46
+
47
+ /*
48
+ * NAME curl_ws_recv()
49
+ *
50
+ * DESCRIPTION
51
+ *
52
+ * Receives data from the websocket connection. Use after successful
53
+ * curl_easy_perform() with CURLOPT_CONNECT_ONLY option.
54
+ */
55
+ CURL_EXTERN CURLcode curl_ws_recv(CURL *curl, void *buffer, size_t buflen,
56
+ size_t *recv,
57
+ const struct curl_ws_frame **metap);
58
+
59
+ /* flags for curl_ws_send() */
60
+ #define CURLWS_PONG (1<<6)
61
+
62
+ /*
63
+ * NAME curl_ws_send()
64
+ *
65
+ * DESCRIPTION
66
+ *
67
+ * Sends data over the websocket connection. Use after successful
68
+ * curl_easy_perform() with CURLOPT_CONNECT_ONLY option.
69
+ */
70
+ CURL_EXTERN CURLcode curl_ws_send(CURL *curl, const void *buffer,
71
+ size_t buflen, size_t *sent,
72
+ curl_off_t fragsize,
73
+ unsigned int flags);
74
+
75
+ /* bits for the CURLOPT_WS_OPTIONS bitmask: */
76
+ #define CURLWS_RAW_MODE (1<<0)
77
+ #define CURLWS_NOAUTOPONG (1<<1)
78
+
79
+ CURL_EXTERN const struct curl_ws_frame *curl_ws_meta(CURL *curl);
80
+
81
+ #ifdef __cplusplus
82
+ }
83
+ #endif
84
+
85
+ #endif /* CURLINC_WEBSOCKETS_H */
@@ -0,0 +1,22 @@
1
+ module CurlImpersonate
2
+ # Extracts cookies from a raw HTTP header string (the value of Response#headers).
3
+ # Returns a Hash<String, String>; later Set-Cookie lines with the same name
4
+ # overwrite earlier ones (matches the Go reference implementation).
5
+ #
6
+ # Only the cookie name/value pair is kept — attributes such as Path, Domain,
7
+ # Expires, Secure are discarded.
8
+ def self.extract_cookies(headers_str)
9
+ cookies = {}
10
+ return cookies if headers_str.nil? || headers_str.empty?
11
+
12
+ headers_str.split(/\r?\n/).each do |line|
13
+ next unless line.downcase.start_with?("set-cookie:")
14
+ pair = line[("set-cookie:".length)..].split(";", 2).first.to_s.strip
15
+ next if pair.empty?
16
+ name, value = pair.split("=", 2)
17
+ cookies[name] = value.to_s if name && !name.empty?
18
+ end
19
+
20
+ cookies
21
+ end
22
+ end
@@ -0,0 +1,7 @@
1
+ module CurlImpersonate
2
+ Response = Struct.new(:status_code, :body, :headers) do
3
+ def success?
4
+ (200..299).cover?(status_code)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ module CurlImpersonate
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,61 @@
1
+ require "curl_impersonate/version"
2
+
3
+ module CurlImpersonate
4
+ class Error < StandardError; end
5
+ end
6
+
7
+ require "curl_impersonate/response"
8
+ require "curl_impersonate/cookies"
9
+
10
+ # The C extension grabs CurlImpersonate::Error and CurlImpersonate::Response
11
+ # via rb_const_get during Init, so both must be defined before this require.
12
+ require "curl_impersonate/curl_impersonate"
13
+
14
+ module CurlImpersonate
15
+ DEFAULT_IMPERSONATE = "chrome131".freeze
16
+ DEFAULT_TIMEOUT_SEC = 15
17
+
18
+ def self.do_request(url:,
19
+ impersonate: DEFAULT_IMPERSONATE,
20
+ headers: {},
21
+ post_data: "",
22
+ follow_redirects: true,
23
+ timeout_sec: DEFAULT_TIMEOUT_SEC,
24
+ proxy: "")
25
+ string_headers = headers.each_with_object({}) { |(k, v), h| h[k.to_s] = v.to_s }
26
+ proxy_url, proxy_userpwd = parse_proxy(proxy.to_s)
27
+ _do_request_native(
28
+ url.to_s,
29
+ impersonate.to_s,
30
+ string_headers,
31
+ post_data.to_s,
32
+ follow_redirects ? true : false,
33
+ Integer(timeout_sec),
34
+ proxy_url,
35
+ proxy_userpwd,
36
+ )
37
+ end
38
+
39
+ # Split "scheme://user:pass@host:port" into ("scheme://host:port", "user:pass").
40
+ # Returns ("", "") for empty input. If there is no "@" the whole string is
41
+ # treated as the proxy URL with empty auth. Port-only or scheme-less inputs
42
+ # are passed through to libcurl, which has its own defaulting logic.
43
+ def self.parse_proxy(proxy)
44
+ return ["", ""] if proxy.nil? || proxy.empty?
45
+
46
+ scheme = ""
47
+ rest = proxy
48
+ if (idx = proxy.index("://"))
49
+ scheme = proxy[0..(idx + 2)]
50
+ rest = proxy[(idx + 3)..]
51
+ end
52
+
53
+ if (idx = rest.rindex("@"))
54
+ auth = rest[0...idx]
55
+ host = rest[(idx + 1)..]
56
+ [scheme + host, auth]
57
+ else
58
+ [proxy, ""]
59
+ end
60
+ end
61
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: curl_impersonate
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - TeamMilestone
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-05-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '13.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '13.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake-compiler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.2'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler-dock
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.5'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.5'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '3.12'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '3.12'
69
+ description: Generate real-browser TLS/JA3 fingerprints from Ruby by calling the BoringSSL-backed
70
+ libcurl-impersonate C library.
71
+ email:
72
+ - alfonso@team-milestone.io
73
+ executables: []
74
+ extensions:
75
+ - ext/curl_impersonate/extconf.rb
76
+ extra_rdoc_files: []
77
+ files:
78
+ - LICENSE
79
+ - README.md
80
+ - ext/curl_impersonate/curl_impersonate.c
81
+ - ext/curl_impersonate/extconf.rb
82
+ - ext/curl_impersonate/include/curl/curl.h
83
+ - ext/curl_impersonate/include/curl/curlver.h
84
+ - ext/curl_impersonate/include/curl/easy.h
85
+ - ext/curl_impersonate/include/curl/header.h
86
+ - ext/curl_impersonate/include/curl/mprintf.h
87
+ - ext/curl_impersonate/include/curl/multi.h
88
+ - ext/curl_impersonate/include/curl/options.h
89
+ - ext/curl_impersonate/include/curl/stdcheaders.h
90
+ - ext/curl_impersonate/include/curl/system.h
91
+ - ext/curl_impersonate/include/curl/typecheck-gcc.h
92
+ - ext/curl_impersonate/include/curl/urlapi.h
93
+ - ext/curl_impersonate/include/curl/websockets.h
94
+ - lib/curl_impersonate.rb
95
+ - lib/curl_impersonate/cookies.rb
96
+ - lib/curl_impersonate/response.rb
97
+ - lib/curl_impersonate/version.rb
98
+ homepage: https://github.com/TeamMilestone/ruby-curl-impersonate
99
+ licenses:
100
+ - MIT
101
+ metadata:
102
+ homepage_uri: https://github.com/TeamMilestone/ruby-curl-impersonate
103
+ source_code_uri: https://github.com/TeamMilestone/ruby-curl-impersonate
104
+ bug_tracker_uri: https://github.com/TeamMilestone/ruby-curl-impersonate/issues
105
+ post_install_message:
106
+ rdoc_options: []
107
+ require_paths:
108
+ - lib
109
+ required_ruby_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '3.0'
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ requirements: []
120
+ rubygems_version: 3.5.22
121
+ signing_key:
122
+ specification_version: 4
123
+ summary: Ruby bindings for libcurl-impersonate — browser-identical TLS fingerprints
124
+ test_files: []