cloud-files 4.27.0__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  Manuel Castro <macastro@princeton.edu>
2
2
  Nico Kemnitz <nkemnitz@princeton.edu>
3
3
  V24 <55334829+umarfarouk98@users.noreply.github.com>
4
+ William Silversmith <william.silvermsith@gmail.com>
4
5
  William Silversmith <william.silversmith@gmail.com>
5
6
  madiganz <madiganz@users.noreply.github.com>
6
7
  ranlu <ranlu@users.noreply.github.com>
@@ -1,52 +1,59 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloud-files
3
- Version: 4.27.0
3
+ Version: 6.0.0
4
4
  Summary: Fast access to cloud storage and local FS.
5
5
  Home-page: https://github.com/seung-lab/cloud-files/
6
6
  Author: William Silversmith
7
7
  Author-email: ws9@princeton.edu
8
- License: License :: OSI Approved :: BSD License
8
+ License: BSD-3-Clause
9
9
  Classifier: Intended Audience :: Developers
10
10
  Classifier: Development Status :: 4 - Beta
11
- Classifier: License :: OSI Approved :: BSD License
12
11
  Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.7
14
- Classifier: Programming Language :: Python :: 3.8
15
12
  Classifier: Programming Language :: Python :: 3.9
16
13
  Classifier: Programming Language :: Python :: 3.10
17
14
  Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
18
17
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
- Requires-Python: >=3.7,<4.0
18
+ Requires-Python: >=3.9,<4.0
20
19
  Description-Content-Type: text/markdown
21
20
  License-File: LICENSE
22
21
  License-File: AUTHORS
23
- Requires-Dist: boto3 (>=1.4.7)
22
+ Requires-Dist: boto3 >=1.4.7
24
23
  Requires-Dist: brotli
25
24
  Requires-Dist: crc32c
26
- Requires-Dist: chardet (>=3.0.4)
25
+ Requires-Dist: chardet >=3.0.4
27
26
  Requires-Dist: click
28
- Requires-Dist: deflate (>=0.2.0)
27
+ Requires-Dist: deflate >=0.2.0
29
28
  Requires-Dist: gevent
30
- Requires-Dist: google-auth (>=1.10.0)
31
- Requires-Dist: google-cloud-core (>=1.1.0)
32
- Requires-Dist: google-cloud-storage (>=1.31.1)
33
- Requires-Dist: google-crc32c (>=1.0.0)
29
+ Requires-Dist: google-auth >=1.10.0
30
+ Requires-Dist: google-cloud-core >=1.1.0
31
+ Requires-Dist: google-cloud-storage >=1.31.1
32
+ Requires-Dist: google-crc32c >=1.0.0
33
+ Requires-Dist: intervaltree
34
+ Requires-Dist: numpy
34
35
  Requires-Dist: orjson
35
36
  Requires-Dist: pathos
36
- Requires-Dist: protobuf (>=3.3.0)
37
- Requires-Dist: requests (>=2.22.0)
38
- Requires-Dist: six (>=1.14.0)
39
- Requires-Dist: tenacity (>=4.10.0)
37
+ Requires-Dist: protobuf >=3.3.0
38
+ Requires-Dist: requests >=2.22.0
39
+ Requires-Dist: six >=1.14.0
40
+ Requires-Dist: tenacity >=4.10.0
40
41
  Requires-Dist: tqdm
41
- Requires-Dist: urllib3 (>=1.26.3)
42
+ Requires-Dist: urllib3 >=1.26.3
42
43
  Requires-Dist: zstandard
43
- Requires-Dist: rsa (>=4.7.2)
44
+ Requires-Dist: rsa >=4.7.2
44
45
  Requires-Dist: fasteners
46
+ Provides-Extra: apache
47
+ Requires-Dist: lxml ; extra == 'apache'
48
+ Provides-Extra: monitoring
49
+ Requires-Dist: psutil ; extra == 'monitoring'
50
+ Requires-Dist: intervaltree ; extra == 'monitoring'
51
+ Requires-Dist: matplotlib ; extra == 'monitoring'
45
52
  Provides-Extra: numpy
46
53
  Requires-Dist: numpy ; extra == 'numpy'
47
54
  Provides-Extra: test
48
55
  Requires-Dist: pytest ; extra == 'test'
49
- Requires-Dist: moto (>=5) ; extra == 'test'
56
+ Requires-Dist: moto >=5 ; extra == 'test'
50
57
 
51
58
  [![PyPI version](https://badge.fury.io/py/cloud-files.svg)](https://badge.fury.io/py/cloud-files) [![Test Suite](https://github.com/seung-lab/cloud-files/workflows/Test%20Suite/badge.svg)](https://github.com/seung-lab/cloud-files/actions?query=workflow%3A%22Test+Suite%22)
52
59
 
@@ -88,12 +95,30 @@ cf.delete(paths, parallel=2) # threaded + two processes
88
95
  boolean = cf.exists('filename')
89
96
  results = cf.exists([ 'filename_1', ... ]) # threaded
90
97
 
98
+ cf.move("a", "gs://bucket/b")
99
+ cf.moves("gs://bucket/", [ ("a", "b") ])
100
+
101
+ cf.touch("example")
102
+ cf.touch([ "example", "example2" ])
103
+
91
104
  # for single files
92
105
  cf = CloudFile("gs://bucket/file1")
93
106
  info = cf.head()
94
107
  binary = cf.get()
108
+ obj = cf.get_json()
95
109
  cf.put(binary)
110
+ cf.put_json()
96
111
  cf[:30] # get first 30 bytes of file
112
+
113
+ num_bytes = cf.size() # get size in bytes (also in head)
114
+ exists = cf.exists() # true or false
115
+ cf.delete() # deletes the file
116
+ cf.touch() # create the file if it doesn't exist
117
+ cf.move("gs://example/destination/directory") # copy then delete source
118
+ cf.transfer_from("gs://example/source/file.txt") # copies file efficiently
119
+ cf.transfer_to("gs://example/dest/file.txt") # copies file efficiently
120
+
121
+ path = cf.join([ path1, path2, path3 ]) # use the appropriate path separator
97
122
  ```
98
123
 
99
124
  CloudFiles was developed to access files from object storage without ever touching disk. The goal was to reliably and rapidly access a petabyte of image data broken down into tens to hundreds of millions of files being accessed in parallel across thousands of cores. CloudFiles has been used to processes dozens of images, many of which were in the hundreds of terabyte range. It has reliably read and written tens of billions of files to date.
@@ -119,6 +144,7 @@ CloudFiles was developed to access files from object storage without ever touchi
119
144
  ```bash
120
145
  pip install cloud-files
121
146
  pip install cloud-files[test] # to enable testing with pytest
147
+ pip install cloud-files[monitoring] # enable plotting network performance
122
148
  ```
123
149
 
124
150
  If you run into trouble installing dependenies, make sure you're using at least Python3.6 and you have updated pip. On Linux, some dependencies require manylinux2010 or manylinux2014 binaries which earlier versions of pip do not search for. MacOS, Linux, and Windows are supported platforms.
@@ -171,7 +197,9 @@ You can create the `google-secret.json` file [here](https://console.cloud.google
171
197
 
172
198
  ## API Documentation
173
199
 
174
- Note that the "Cloud Costs" mentioned below are current as of June 2020 and are subject to change. As of this writing, S3 and Google use identical cost structures for these operations.
200
+ Note that the "Cloud Costs" mentioned below are current as of June 2020 and are subject to change. As of this writing, S3 and Google use identical cost structures for these operations.
201
+
202
+ `CloudFile` is a more intuitive version of `CloudFiles` designed for managing single files instead of groups of files. See examples above. There is an analogus method for each `CloudFiles` method (where it makes sense).
175
203
 
176
204
  ### Constructor
177
205
  ```python
@@ -231,6 +259,10 @@ binary = cf['filename'][0:5] # same result, fetches 11 bytes
231
259
  >> b'hello' # represents byte range 0-4 inclusive of filename
232
260
 
233
261
  binaries = cf[:100] # download the first 100 files
262
+
263
+ # Get the TransmissionMonitor object that records
264
+ # the flight time of each file.
265
+ binaries, tm = cf.get(..., return_recording=True)
234
266
  ```
235
267
 
236
268
  `get` supports several different styles of input. The simplest takes a scalar filename and returns the contents of that file. However, you can also specify lists of filenames, a byte range request, and lists of byte range requests. You can provide a generator or iterator as input as well. Order is not guaranteed.
@@ -260,6 +292,10 @@ cf.puts([{
260
292
  cf.puts([ (path, content), (path, content) ], compression='gzip')
261
293
  cf.put_jsons(...)
262
294
 
295
+ # Get the TransmissionMonitor object that records the
296
+ # flight times of each object.
297
+ _, tm = cf.puts(..., return_recording=True)
298
+
263
299
  # Definition of put, put_json is identical
264
300
  def put(
265
301
  self,
@@ -464,6 +500,16 @@ cloudfiles -p 2 cp --progress -r s3://bkt/ gs://bkt2/
464
500
  cloudfiles cp -c br s3://bkt/file.txt gs://bkt2/
465
501
  # decompress
466
502
  cloudfiles cp -c none s3://bkt/file.txt gs://bkt2/
503
+ # save chart of file flight times
504
+ cloudfiles cp --flight-time s3://bkt/file.txt gs://bkt2/
505
+ # save a chart of estimated bandwidth usage from these files alone
506
+ cloudfiles cp --io-rate s3://bkt/file.txt gs://bkt2/
507
+ # save a chart of measured bandwidth usage for the machine
508
+ cloudfiles cp --machine-io-rate s3://bkt/file.txt gs://bkt2/
509
+ # move or rename files
510
+ cloudfiles mv s3://bkt/file.txt gs://bkt2/
511
+ # create an empty file if not existing
512
+ cloudfiles touch s3://bkt/empty.txt
467
513
  # pass from stdin (use "-" for source argument)
468
514
  find some_dir | cloudfiles cp - s3://bkt/
469
515
  # resumable transfers
@@ -519,6 +565,40 @@ cloudfiles alias rm example # remove example://
519
565
 
520
566
  The alias file is only accessed (and cached) if CloudFiles encounters an unknown protocol. If you stick to default protocols and use the syntax `s3://https://example.com/` for alternative endpoints, you can still use CloudFiles in environments without filesystem access.
521
567
 
568
+ ## Performance Monitoring
569
+
570
+ CloudFiles now comes with two tools inside of `cloudfiles.monitoring` for measuring the performance of transfer operations both via the CLI and the programatic interface.
571
+
572
+ A `TransmissionMonitor` object is created during each download or upload (e.g. `cf.get` or `cf.puts`) call. You can access this object by using the `return_recording=True` flag. This object saves the flight times of each object along with its size in an interval tree datastructure. It comes with methods for estimating the peak bits per a second and can plot both time of flight and the estimated transfer rates (assuming the transfer is evenly divided over the flight of an object, an assumption that is not always true). This allows you to estimate the contribution of a given CloudFiles operation to a machine's network IO.
573
+
574
+ ```python
575
+ from cloudfiles import CloudFiles
576
+
577
+ ...
578
+
579
+ results, tm = cf.get([ ... some files ... ], return_recording=True)
580
+
581
+ value = tm.peak_Mbps() # estimated peak transfer rate
582
+ value = tm.total_Mbps() # estimated average transfer rate
583
+ tm.plot_gantt() # time of flight chart
584
+ tm.plot_histogram() # transfer rate chart
585
+ ```
586
+
587
+ A second object, `IOSampler`, can sample the OS network counters using a background thread and provides a global view of the machine's network performance during the life of the transfer. It is enabled on the CLI for the `cp` command when the `--machine-io-rate` flag is enabled, but must be manually started programatically. This is to avoid accidentally starting unnecessary sampling threads. The samples are accumulated into a circular buffer, so make sure to set the buffer length long enough for your points of interest to be captured.
588
+
589
+ ```python
590
+ from cloudfiles.monitoring import IOSampler
591
+
592
+ sampler = IOSampler(buffer_sec=600, interval=0.25)
593
+ sampler.start_sampling()
594
+
595
+ ...
596
+
597
+
598
+ sampler.stop_sampling()
599
+ sampler.plot_histogram()
600
+ ```
601
+
522
602
  ## Credits
523
603
 
524
604
  CloudFiles is derived from the [CloudVolume.Storage](https://github.com/seung-lab/cloud-volume/tree/master/cloudvolume/storage) system.
@@ -0,0 +1,27 @@
1
+ cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
2
+ cloudfiles/cloudfiles.py,sha256=SlSkGKCBdnne7vE7Y5_tvsMcFNx_coUWMtDajEnbHfY,58093
3
+ cloudfiles/compression.py,sha256=WXJHnoNLJ_NWyoY9ygZmFA2qMou35_9xS5dzF7-2H-M,6262
4
+ cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
5
+ cloudfiles/exceptions.py,sha256=N0oGQNG-St6RvnT8e5p_yC_E61q2kgAe2scwAL0F49c,843
6
+ cloudfiles/gcs.py,sha256=unqu5KxGKaPq6N4QeHSpCDdtnK1BzPOAerTZ8FLt2_4,3820
7
+ cloudfiles/interfaces.py,sha256=Kg5t2-tWD0EoJ0qK-wid7zdxLgs7q0mDduPxAzyUUL0,47499
8
+ cloudfiles/lib.py,sha256=HHjCvjmOjA0nZWSvHGoSeYpxqd6FAG8xk8LM212LAUA,5382
9
+ cloudfiles/monitoring.py,sha256=N5Xq0PYZK1OxoYtwBFsnnfaq7dElTgY8Rn2Ez_I3aoo,20897
10
+ cloudfiles/paths.py,sha256=FLdShqkOg1XlkHurU9eiKzLadx2JFYG1EmleCpOFsYQ,12229
11
+ cloudfiles/resumable_tools.py,sha256=NyuSoGh1SaP5akrHCpd9kgy2-JruEWrHW9lvJxV7jpE,6711
12
+ cloudfiles/scheduler.py,sha256=ioqBT5bMPCVHEHlnL-SW_wHulxGgjeThiKHlnaDOydo,3831
13
+ cloudfiles/secrets.py,sha256=IuYKHmmvFmQTyG2Zcmbx7e8U2LIv-woG5d8qyOlyCD8,5431
14
+ cloudfiles/test.py,sha256=jktBhzKLU448QPs5H7QVnTvpYMo4lBBWG1aE7-uzVGQ,678
15
+ cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,7082
16
+ cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
17
+ cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
18
+ cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
19
+ cloudfiles_cli/cloudfiles_cli.py,sha256=jHbQasZb5DB_g8nGxS3Y0ekAdIPcSVrHN5mvEedUl0k,38908
20
+ cloud_files-6.0.0.dist-info/AUTHORS,sha256=BFVmobgAhaVFI5fqbuqAY5XmBQxe09ZZAsAOTy87hKQ,318
21
+ cloud_files-6.0.0.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
22
+ cloud_files-6.0.0.dist-info/METADATA,sha256=SJw22OqzxSN3BvyacUjQgJ1trdAWs4mJv9hC0LYKQZk,30530
23
+ cloud_files-6.0.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
24
+ cloud_files-6.0.0.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
25
+ cloud_files-6.0.0.dist-info/pbr.json,sha256=P1Yg68JWbSeMCxsbPR-QhAUj2p8rzNNuqgMHtcFAveo,46
26
+ cloud_files-6.0.0.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
27
+ cloud_files-6.0.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.0)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1 @@
1
+ {"git_version": "38a2b59", "is_release": true}