github-linguist 2.11.0 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5f3d2afbe328769fe6d43290ac15e279a1fc839e
4
- data.tar.gz: 6150cd186aa13e933e26c01ea48c3b2416ecf965
3
+ metadata.gz: 20cf20617d8a0934a17836c944818f33e9d1efa2
4
+ data.tar.gz: c9852f9e4df5fca5cbf768e1109ef343f685ec69
5
5
  SHA512:
6
- metadata.gz: c0e276415c90ee3dcd43d6eed9f7357e4cd49838429c82200754a3fb7657ba298de3ba553c11ae481a8aa25735140f6181c1c3c78bc91e94d1d48e1aaf4b1db9
7
- data.tar.gz: 4af5b1dd14a9b5dc42d49accac0241193f92491d1c99b17b2e918689c0e2d6aa3324cfaeebee80b4e1dc22aef51a9cd03b549449698aa1f34822c968c0a152c2
6
+ metadata.gz: 52bfdbda263546ec1075a93c3f5132726ff35e47611e295ff6b29169989cbfef9bf387a94578f74ef4ef1349d518b0b837f107e4008b1bb11a854fecc8488073
7
+ data.tar.gz: fe48a2abc882001aba7c700d8ee44438bbf004f5a7d85394a90aa9330b4b7bbb67b2f4c4f7cd27680dbfd0d124ce5647970b70c520e010a9fbb22dc2e389cd20
@@ -241,7 +241,25 @@ module Linguist
241
241
  def lines
242
242
  @lines ||=
243
243
  if viewable? && data
244
- data.split(/\r\n|\r|\n/, -1)
244
+ # `data` is usually encoded as ASCII-8BIT even when the content has
245
+ # been detected as a different encoding. However, we are not allowed
246
+ # to change the encoding of `data` because we've made the implicit
247
+ # guarantee that each entry in `lines` is encoded the same way as
248
+ # `data`.
249
+ #
250
+ # Instead, we re-encode each possible newline sequence as the
251
+ # detected encoding, then force them back to the encoding of `data`
252
+ # (usually a binary encoding like ASCII-8BIT). This means that the
253
+ # byte sequence will match how newlines are likely encoded in the
254
+ # file, but we don't have to change the encoding of `data` as far as
255
+ # Ruby is concerned. This allows us to correctly parse out each line
256
+ # without changing the encoding of `data`, and
257
+ # also--importantly--without having to duplicate many (potentially
258
+ # large) strings.
259
+ encoded_newlines = ["\r\n", "\r", "\n"].
260
+ map { |nl| nl.encode(encoding).force_encoding(data.encoding) }
261
+
262
+ data.split(Regexp.union(encoded_newlines), -1)
245
263
  else
246
264
  []
247
265
  end
@@ -1,3 +1,3 @@
1
1
  module Linguist
2
- VERSION = "2.11.0"
2
+ VERSION = "2.11.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: github-linguist
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.11.0
4
+ version: 2.11.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - GitHub
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-16 00:00:00.000000000 Z
11
+ date: 2014-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: charlock_holmes