github-linguist 2.11.0 → 2.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/linguist/blob_helper.rb +19 -1
- data/lib/linguist/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20cf20617d8a0934a17836c944818f33e9d1efa2
|
4
|
+
data.tar.gz: c9852f9e4df5fca5cbf768e1109ef343f685ec69
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52bfdbda263546ec1075a93c3f5132726ff35e47611e295ff6b29169989cbfef9bf387a94578f74ef4ef1349d518b0b837f107e4008b1bb11a854fecc8488073
|
7
|
+
data.tar.gz: fe48a2abc882001aba7c700d8ee44438bbf004f5a7d85394a90aa9330b4b7bbb67b2f4c4f7cd27680dbfd0d124ce5647970b70c520e010a9fbb22dc2e389cd20
|
data/lib/linguist/blob_helper.rb
CHANGED
@@ -241,7 +241,25 @@ module Linguist
|
|
241
241
|
def lines
|
242
242
|
@lines ||=
|
243
243
|
if viewable? && data
|
244
|
-
data
|
244
|
+
# `data` is usually encoded as ASCII-8BIT even when the content has
|
245
|
+
# been detected as a different encoding. However, we are not allowed
|
246
|
+
# to change the encoding of `data` because we've made the implicit
|
247
|
+
# guarantee that each entry in `lines` is encoded the same way as
|
248
|
+
# `data`.
|
249
|
+
#
|
250
|
+
# Instead, we re-encode each possible newline sequence as the
|
251
|
+
# detected encoding, then force them back to the encoding of `data`
|
252
|
+
# (usually a binary encoding like ASCII-8BIT). This means that the
|
253
|
+
# byte sequence will match how newlines are likely encoded in the
|
254
|
+
# file, but we don't have to change the encoding of `data` as far as
|
255
|
+
# Ruby is concerned. This allows us to correctly parse out each line
|
256
|
+
# without changing the encoding of `data`, and
|
257
|
+
# also--importantly--without having to duplicate many (potentially
|
258
|
+
# large) strings.
|
259
|
+
encoded_newlines = ["\r\n", "\r", "\n"].
|
260
|
+
map { |nl| nl.encode(encoding).force_encoding(data.encoding) }
|
261
|
+
|
262
|
+
data.split(Regexp.union(encoded_newlines), -1)
|
245
263
|
else
|
246
264
|
[]
|
247
265
|
end
|
data/lib/linguist/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.11.
|
4
|
+
version: 2.11.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GitHub
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: charlock_holmes
|