ServiceX_DID_Finder_lib 2.0.3__tar.gz → 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,15 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ServiceX_DID_Finder_lib
3
- Version: 2.0.3
3
+ Version: 3.0.0
4
4
  Summary: ServiceX DID Library Routines
5
5
  Author: Gordon Watts
6
6
  Author-email: gwatts@uw.edu
7
- Requires-Python: >=3.8,<4.0
7
+ Requires-Python: >=3.8.1,<4.0.0
8
8
  Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.8
10
9
  Classifier: Programming Language :: Python :: 3.9
11
10
  Classifier: Programming Language :: Python :: 3.10
12
11
  Classifier: Programming Language :: Python :: 3.11
13
12
  Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: Celery (>=5.4,<6.0)
14
14
  Requires-Dist: make-it-sync (>=1.0.0,<2.0.0)
15
- Requires-Dist: pika (==1.1.0)
16
15
  Requires-Dist: requests (>=2.25.0,<3.0.0)
@@ -1,21 +1,22 @@
1
1
  [tool.poetry]
2
2
  name = "ServiceX_DID_Finder_lib"
3
- version = "2.0.3"
3
+ version = "3.0.0"
4
4
  description = "ServiceX DID Library Routines"
5
5
  authors = ["Gordon Watts <gwatts@uw.edu>"]
6
6
 
7
7
  [tool.poetry.dependencies]
8
- python = "^3.8"
9
- pika = "1.1.0"
8
+ python = "^3.8.1"
10
9
  make-it-sync = "^1.0.0"
11
10
  requests = "^2.25.0"
11
+ Celery= "^5.4"
12
+
12
13
 
13
14
  [tool.poetry.group.dev]
14
15
  optional = true
15
16
 
16
17
  [tool.poetry.group.dev.dependencies]
17
- pytest = "^7.4"
18
- flake8 = "^3.9.1"
18
+ pytest = "^8.2"
19
+ flake8 = "^7.1"
19
20
  pytest-mock = "^3.12.0"
20
21
  coverage = "^7.4.0"
21
22
  responses = "^0.14.0"
@@ -0,0 +1 @@
1
+ from .did_finder_app import DIDFinderApp # NOQA F401
@@ -0,0 +1,80 @@
1
+ # Copyright (c) 2024, IRIS-HEP
2
+ # All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions are met:
6
+ #
7
+ # * Redistributions of source code must retain the above copyright notice, this
8
+ # list of conditions and the following disclaimer.
9
+ #
10
+ # * Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ #
14
+ # * Neither the name of the copyright holder nor the names of its
15
+ # contributors may be used to endorse or promote products derived from
16
+ # this software without specific prior written permission.
17
+ #
18
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ from typing import List, Dict, Any, Union
29
+
30
+ from servicex_did_finder_lib.did_summary import DIDSummary
31
+ from servicex_did_finder_lib.servicex_adaptor import ServiceXAdapter
32
+
33
+
34
+ class Accumulator:
35
+ """Track or cache files depending on the mode we are operating in"""
36
+
37
+ def __init__(self, sx: ServiceXAdapter, sum: DIDSummary):
38
+ self.servicex = sx
39
+ self.summary = sum
40
+ self.file_cache: List[Dict[str, Any]] = []
41
+
42
+ def add(self, file_info: Union[Dict[str, Any], List[Dict[str, Any]]]):
43
+ """
44
+ Track and inject the file back into the system
45
+ :param file_info: The file information to track can be a single record or a list
46
+ """
47
+ if isinstance(file_info, dict):
48
+ self.file_cache.append(file_info)
49
+ elif isinstance(file_info, list):
50
+ self.file_cache.extend(file_info)
51
+ else:
52
+ raise ValueError("Invalid input: expected a dictionary or a list of dictionaries")
53
+
54
+ @property
55
+ def cache_len(self) -> int:
56
+ return len(self.file_cache)
57
+
58
+ def send_on(self, count):
59
+ """
60
+ Send the accumulated files
61
+ :param count: The number of files to send. Set to -1 to send all
62
+ """
63
+
64
+ # Sort the list to insure reproducibility
65
+ files = sorted(self.file_cache, key=lambda x: x["paths"])
66
+ if count == -1:
67
+ self.send_bulk(files)
68
+ else:
69
+ self.send_bulk(files[:count])
70
+
71
+ self.file_cache.clear()
72
+
73
+ def send_bulk(self, file_list: List[Dict[str, Any]]):
74
+ """
75
+ does a bulk put of files
76
+ :param file_list: The list of files to send
77
+ """
78
+ for ifl in file_list:
79
+ self.summary.add_file(ifl)
80
+ self.servicex.put_file_add_bulk(file_list)
@@ -0,0 +1,166 @@
1
+ # Copyright (c) 2022, IRIS-HEP
2
+ # All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions are met:
6
+ #
7
+ # * Redistributions of source code must retain the above copyright notice, this
8
+ # list of conditions and the following disclaimer.
9
+ #
10
+ # * Redistributions in binary form must reproduce the above copyright notice,
11
+ # this list of conditions and the following disclaimer in the documentation
12
+ # and/or other materials provided with the distribution.
13
+ #
14
+ # * Neither the name of the copyright holder nor the names of its
15
+ # contributors may be used to endorse or promote products derived from
16
+ # this software without specific prior written permission.
17
+ #
18
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
+ # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
+ # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22
+ # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
+ # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25
+ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26
+ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27
+ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
+ import logging
29
+ from datetime import datetime
30
+ from typing import Any, Generator, Callable, Dict, Optional
31
+
32
+ from celery import Celery, Task
33
+
34
+ from servicex_did_finder_lib.accumulator import Accumulator
35
+ from servicex_did_finder_lib.did_logging import initialize_root_logger
36
+ from servicex_did_finder_lib.did_summary import DIDSummary
37
+ from servicex_did_finder_lib.servicex_adaptor import ServiceXAdapter
38
+ from servicex_did_finder_lib.util_uri import parse_did_uri
39
+
40
+ # The type for the callback method to handle DID's, supplied by the user.
41
+ # Arguments are:
42
+ # - The DID to process
43
+ # - A dictionary of information about the DID request
44
+ # - A dictionary of arguments passed to the DID finder
45
+ UserDIDHandler = Callable[
46
+ [str, Dict[str, Any], Dict[str, Any]],
47
+ Generator[Dict[str, Any], None, None]
48
+ ]
49
+
50
+
51
+ __logging = logging.getLogger(__name__)
52
+ __logging.addHandler(logging.NullHandler())
53
+
54
+
55
+ class DIDFinderTask(Task):
56
+ """
57
+ A Celery task that will process a single DID request. This task will
58
+ call the user supplied DID finder to get the list of files associated
59
+ with the DID, and then send that list to ServiceX for processing.
60
+ """
61
+ def __init__(self):
62
+ super().__init__()
63
+ self.logger = logging.getLogger(__name__)
64
+ self.logger.addHandler(logging.NullHandler())
65
+
66
+ def do_lookup(self, did: str, dataset_id: int, endpoint: str, user_did_finder: UserDIDHandler):
67
+ """
68
+ Perform the DID lookup for the given DID. This will call the user supplied
69
+ DID finder to get the list of files associated with the DID, and then send
70
+ that list to ServiceX for processing.
71
+ After all of the files have been sent, send a message to ServiceX indicating
72
+ that the fileset is complete
73
+ Args:
74
+ did: The DID to process
75
+ dataset_id: The dataset ID for the request
76
+ endpoint: The ServiceX endpoint to send the request to
77
+ user_did_finder: The user supplied DID finder to call to get the list of files
78
+ """
79
+
80
+ self.logger.info(
81
+ f"Received DID request {did}",
82
+ extra={"dataset_id": dataset_id}
83
+ )
84
+
85
+ servicex = ServiceXAdapter(dataset_id=dataset_id, endpoint=endpoint)
86
+
87
+ info = {
88
+ "dataset-id": dataset_id,
89
+ }
90
+
91
+ start_time = datetime.now()
92
+
93
+ summary = DIDSummary(did)
94
+ did_info = parse_did_uri(did)
95
+ acc = Accumulator(servicex, summary)
96
+
97
+ try:
98
+ for file_info in user_did_finder(did_info.did, info, self.app.did_finder_args):
99
+ acc.add(file_info)
100
+
101
+ acc.send_on(did_info.file_count)
102
+ except Exception:
103
+ # noinspection PyTypeChecker
104
+ self.logger.error(
105
+ f"Error processing DID {did}",
106
+ extra={"dataset_id": dataset_id},
107
+ exc_info=1
108
+ )
109
+ finally:
110
+ elapsed_time = int((datetime.now() - start_time).total_seconds())
111
+ servicex.put_fileset_complete(
112
+ {
113
+ "files": summary.file_count,
114
+ "files-skipped": summary.files_skipped,
115
+ "total-events": summary.total_events,
116
+ "total-bytes": summary.total_bytes,
117
+ "elapsed-time": elapsed_time,
118
+ }
119
+ )
120
+
121
+
122
+ class DIDFinderApp(Celery):
123
+ """
124
+ The main application for a DID finder. This will setup the Celery application
125
+ and start the worker to process the DID requests.
126
+ """
127
+ def __init__(self, did_finder_name: str,
128
+ did_finder_args: Optional[Dict[str, Any]] = None,
129
+ *args, **kwargs):
130
+ """
131
+ Initialize the DID finder application
132
+ Args:
133
+ did_finder_name: The name of the DID finder.
134
+ did_finder_args: The parsed command line arguments and other objects you want
135
+ to make available to the tasks
136
+ """
137
+
138
+ self.name = did_finder_name
139
+ initialize_root_logger(self.name)
140
+
141
+ super().__init__(f"did_finder_{self.name}", *args,
142
+ broker_connection_retry_on_startup=True,
143
+ **kwargs)
144
+
145
+ # Cache the args in the App, so they are accessible to the tasks
146
+ self.did_finder_args = did_finder_args
147
+
148
+ def did_lookup_task(self, name):
149
+ """
150
+ Decorator to create a new task to handle a DID lookup request wihout
151
+ needing to know about Celery tasks.
152
+ Usage:
153
+ @app.did_lookup_task(name="did_finder_cern_opendata.lookup_dataset")
154
+ def lookup_dataset(self, did: str, dataset_id: int, endpoint: str) -> None:
155
+ self.do_lookup(did=did, dataset_id=dataset_id,
156
+ endpoint=endpoint, user_did_finder=find_files)
157
+
158
+ Args:
159
+ name: The name of the task
160
+ """
161
+ def decorator(func):
162
+ @self.task(base=DIDFinderTask, bind=True, name=name)
163
+ def wrapper(*args, **kwargs):
164
+ return func(*args, **kwargs)
165
+ return wrapper
166
+ return decorator
@@ -51,23 +51,6 @@ class ServiceXAdapter:
51
51
  'file_events': file_info['file_events']
52
52
  }
53
53
 
54
- def put_file_add(self, file_info):
55
- success = False
56
- attempts = 0
57
- while not success and attempts < MAX_RETRIES:
58
- try:
59
- mesg = self._create_json(file_info)
60
- requests.put(f"{self.endpoint}{self.dataset_id}/files", json=mesg)
61
- self.logger.info("adding file:", extra=file_info)
62
- success = True
63
- except requests.exceptions.ConnectionError:
64
- self.logger.exception(f'Connection error to ServiceX App. Will retry '
65
- f'(try {attempts} out of {MAX_RETRIES}')
66
- attempts += 1
67
- if not success:
68
- self.logger.error(f'After {attempts} tries, failed to send ServiceX App a put_file '
69
- f'message: {str(file_info)} - Ignoring error.')
70
-
71
54
  def put_file_add_bulk(self, file_list, chunk_length=300):
72
55
  # we first chunk up file_list as it can be very large in
73
56
  # case there are a lot of replicas and a lot of files.
@@ -1,4 +0,0 @@
1
- __version__ = '1.0.0a1'
2
-
3
- from .communication import start_did_finder, \
4
- add_did_finder_cnd_arguments # NOQA