akita-har_logger 0.2.3 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5a4d073bcb2347b0527e3dab531c77770a29c29326bf76cb2ca09724206af8e5
4
- data.tar.gz: 2fc8b6e0b4721699f87a08ba098b3d6137835b14c01eeb73b945a453a98267f2
3
+ metadata.gz: b2030633ff794a611ceb1fddba8b274e5617765c0644dcdebbe6743515dd0576
4
+ data.tar.gz: c3b1eb76987d0db9dcf63830a67882cbdfbaceeba937c1a9a20a874ddc4efd42
5
5
  SHA512:
6
- metadata.gz: f07983494e000f9ed64ffdfe346869bdd0b574db21210a39de0199d1df81f877262c2623c72ca918c792cff77a29872224a5ba3d5f3ac12691381d8caf89bc17
7
- data.tar.gz: 513be52cfaf7f114cb95ed33df45528336db9f9945ac5252db12a9abfe30103308cd1d7d92585ec6ac8897c231129e0d83cee596a3df3034946c88b5c59dcde6
6
+ metadata.gz: 7b7e2cbf0e7580c044ca178d58af63fc41059318475f033314bf9fbba9ce3546da02808b2c237c6cef6ada40d6b6eebaf905ed2201f943367deea8fc49342d5b
7
+ data.tar.gz: c8637bc4282a1571b0366e1dbff4948771a7d10913b26c7df595996b4096dcd18e8119448cf3f578c9d33f3acf12034c05d92b62ce1900eff34b4c5adb71fe9f
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- akita-har_logger (0.2.3)
4
+ akita-har_logger (0.2.7)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -4,13 +4,32 @@ module Akita
4
4
  module HarLogger
5
5
  class HarUtils
6
6
  # Rack apparently uses 8-bit ASCII for everything, even when the string
7
- # is not 8-bit ASCII. This reinterprets the given string as UTF-8.
7
+ # is not 8-bit ASCII. This reinterprets 8-bit ASCII strings as UTF-8.
8
+ #
9
+ # If we are unable to do this reinterpretation, return the string
10
+ # unchanged, but log a warning that points to the caller.
8
11
  def self.fixEncoding(v)
9
- if v == nil || v.encoding == Encoding::UTF_8 then
10
- v
11
- else
12
- String.new(v).force_encoding(Encoding::UTF_8)
12
+ if v == nil then
13
+ return v
13
14
  end
15
+
16
+ if !(v.is_a? String) then
17
+ Rails.logger.warn "[#{caller_locations(1, 1)}] fixEncoding was not given a string. This might cause JSON serialization to fail."
18
+ return v
19
+ end
20
+
21
+ # Only re-interpret 8-bit ASCII.
22
+ if v.encoding != Encoding::ASCII_8BIT then
23
+ return v
24
+ end
25
+
26
+ forced = String.new(v).force_encoding(Encoding::UTF_8)
27
+ if forced.valid_encoding? then
28
+ return forced
29
+ end
30
+
31
+ Rails.logger.warn "[#{caller_locations(1, 1)}] Unable to fix encoding: not a valid UTF-8 string. This will likely cause JSON serialization to fail."
32
+ v
14
33
  end
15
34
 
16
35
  # Converts a Hash into a list of Hash objects. Each entry in the given
@@ -85,7 +85,7 @@ module Akita
85
85
  return Encoding::ISO_8859_1
86
86
  end
87
87
 
88
- Encoding.default_external
88
+ Encoding::UTF_8
89
89
  end
90
90
 
91
91
  # Obtains the posted data from an HTTP environment.
@@ -117,8 +117,27 @@ module Akita
117
117
  # body when the request specifies UTF-8. Reinterpret the content
118
118
  # body according to what the request says it is, and re-encode into
119
119
  # UTF-8.
120
- result[:text] = req.body.string.encode(Encoding::UTF_8,
121
- getPostDataCharSet(env))
120
+ #
121
+ # Gracefully handle any characters that are invalid in the source
122
+ # encoding and characters that have no UTF-8 representation by
123
+ # replacing with '?'. Log a warning when this happens.
124
+ source = req.body.string.force_encoding(getPostDataCharSet(env))
125
+ utf8EncodingSuccessful = false
126
+ if source.valid_encoding? then
127
+ begin
128
+ result[:text] = source.encode(Encoding::UTF_8)
129
+ utf8EncodingSuccessful = true
130
+ rescue Encoding::UndefinedConversionError
131
+ Rails.logger.warn "[#{caller_locations(0, 1)}] Unable to losslessly convert request body from #{source.encoding} to UTF-8. Characters undefined in UTF-8 will be replaced with '?'."
132
+ end
133
+ else
134
+ Rails.logger.warn "[#{caller_locations(0, 1)}] Request body is not valid #{source.encoding}. Invalid characters and characters undefined in UTF-8 will be replaced with '?'."
135
+ end
136
+
137
+ if !utf8EncodingSuccessful then
138
+ result[:text] = source.encode(Encoding::UTF_8,
139
+ invalid: :replace, undef: :replace, replace: '?')
140
+ end
122
141
  end
123
142
 
124
143
  result
@@ -144,7 +163,7 @@ module Akita
144
163
  # for the CRLF on the blank line.
145
164
  getHeaders(env).reduce(line_length + 2) { |accum, entry|
146
165
  # Header-Name: header value<CR><LF>
147
- accum + entry[:name].length + 2 + entry[:value].length + 2
166
+ accum + entry[:name].length + 2 + entry[:value].to_s.length + 2
148
167
  }
149
168
  end
150
169
 
@@ -25,7 +25,7 @@ module Akita
25
25
 
26
26
  # Obtains the status text corresponding to a status code.
27
27
  def getStatusText(status)
28
- Rack::Utils::HTTP_STATUS_CODES[status]
28
+ HarUtils.fixEncoding(Rack::Utils::HTTP_STATUS_CODES[status])
29
29
  end
30
30
 
31
31
  # Obtains the HTTP version in the response.
@@ -65,8 +65,8 @@ module Akita
65
65
  if match then cookie_value = match[1] end
66
66
 
67
67
  result << {
68
- name: cookie_name,
69
- value: cookie_value,
68
+ name: HarUtils.fixEncoding(cookie_name),
69
+ value: HarUtils.fixEncoding(cookie_value),
70
70
  }
71
71
  }
72
72
 
@@ -74,19 +74,56 @@ module Akita
74
74
  end
75
75
 
76
76
  def getContent(headers, body)
77
- # XXX Handle compression & encoding.
77
+ # XXX Handle compression
78
+ # XXX Figure out how to properly join together multi-part bodies.
78
79
 
80
+ # Try to convert the body into UTF-8. If this fails, assume the body is
81
+ # binary data.
82
+ # XXX TODO Take charset part of Content-Type header into account.
79
83
  text = +""
84
+ haveBinaryData = false
80
85
  body.each { |part|
81
- # XXX Figure out how to join together multi-part bodies.
82
- text << part;
86
+ partStr = part.to_s
87
+
88
+ if partStr.encoding == Encoding::ASCII_8BIT then
89
+ # Have 8-bit ASCII data. Try to interpret as UTF-8. If this fails,
90
+ # treat as binary data.
91
+ forced = String.new(partStr).force_encoding(Encoding::UTF_8)
92
+ if forced.valid_encoding? then
93
+ text << forced
94
+ next
95
+ end
96
+
97
+ haveBinaryData = true
98
+ break
99
+ end
100
+
101
+ if !partStr.valid_encoding? then
102
+ # Source encoding is not valid. Treat as binary data.
103
+ haveBinaryData = true
104
+ break
105
+ end
106
+
107
+ # Try to re-encode as UTF-8. If this fails, treat as binary data.
108
+ begin
109
+ text << partStr.encode(Encoding::UTF_8)
110
+ rescue Encoding::UndefinedConversionError
111
+ haveBinaryData = true
112
+ break
113
+ end
83
114
  }
84
115
 
116
+ if haveBinaryData then
117
+ # TODO Encode binary body data with base64.
118
+ # XXX Omit for now.
119
+ text = ""
120
+ end
121
+
85
122
  {
86
123
  size: getBodySize(body),
87
124
 
88
125
  # XXX What to use when no Content-Type is given?
89
- mimeType: headers['Content-Type'],
126
+ mimeType: HarUtils.fixEncoding(headers['Content-Type']),
90
127
 
91
128
  text: text,
92
129
  }
@@ -95,7 +132,9 @@ module Akita
95
132
  def getRedirectUrl(headers)
96
133
  # Use the "Location" header if it exists. Otherwise, based on some HAR
97
134
  # examples found online, it looks like an empty string is used.
98
- headers.key?('Location') ? headers['Location'] : ''
135
+ headers.key?('Location') ?
136
+ HarUtils.fixEncoding(headers['Location']) :
137
+ ''
99
138
  end
100
139
 
101
140
  def getHeadersSize(env, status, headers)
@@ -114,7 +153,7 @@ module Akita
114
153
  # for the CRLF on the blank line.
115
154
  headers.reduce(status_length + 2) { |accum, (k, v)|
116
155
  # Header-Name: header value<CR><LF>
117
- accum + k.length + 2 + v.length + 2
156
+ accum + k.length + 2 + v.to_s.length + 2
118
157
  }
119
158
  end
120
159
 
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Akita
4
4
  module HarLogger
5
- VERSION = "0.2.3"
5
+ VERSION = "0.2.7"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: akita-har_logger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jed Liu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-07-11 00:00:00.000000000 Z
11
+ date: 2021-07-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec