heurist-api 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of heurist-api might be problematic. Click here for more details.

Files changed (80) hide show
  1. heurist/__init__.py +1 -0
  2. heurist/api/__init__.py +0 -0
  3. heurist/api/client.py +122 -0
  4. heurist/api/connection.py +71 -0
  5. heurist/api/constants.py +19 -0
  6. heurist/api/credentials.py +71 -0
  7. heurist/api/exceptions.py +45 -0
  8. heurist/api/url_builder.py +148 -0
  9. heurist/api/utils.py +24 -0
  10. heurist/cli/__init__.py +0 -0
  11. heurist/cli/__main__.py +227 -0
  12. heurist/cli/load.py +55 -0
  13. heurist/cli/records.py +49 -0
  14. heurist/cli/schema.py +94 -0
  15. heurist/database/__init__.py +3 -0
  16. heurist/database/basedb.py +125 -0
  17. heurist/database/database.py +96 -0
  18. heurist/models/__init__.py +0 -0
  19. heurist/models/dynamic/__init__.py +3 -0
  20. heurist/models/dynamic/annotation.py +143 -0
  21. heurist/models/dynamic/create_model.py +82 -0
  22. heurist/models/dynamic/date.py +61 -0
  23. heurist/models/dynamic/type.py +96 -0
  24. heurist/models/structural/DetailTypes.py +34 -0
  25. heurist/models/structural/RecStructure.py +27 -0
  26. heurist/models/structural/RecTypeGroups.py +27 -0
  27. heurist/models/structural/RecTypes.py +27 -0
  28. heurist/models/structural/Terms.py +27 -0
  29. heurist/models/structural/__init__.py +19 -0
  30. heurist/models/structural/dty.py +121 -0
  31. heurist/models/structural/hml_structure.py +36 -0
  32. heurist/models/structural/rst.py +141 -0
  33. heurist/models/structural/rtg.py +25 -0
  34. heurist/models/structural/rty.py +81 -0
  35. heurist/models/structural/trm.py +34 -0
  36. heurist/models/structural/utils.py +53 -0
  37. heurist/schema/__init__.py +27 -0
  38. heurist/schema/models.py +70 -0
  39. heurist/schema/rel_to_dict.py +39 -0
  40. heurist/sql/__init__.py +21 -0
  41. heurist/sql/joinRecordTypeIDNameByGroupType.sql +10 -0
  42. heurist/sql/joinRecordTypeMetadata.sql +17 -0
  43. heurist/sql/selectRecordTypeSchema.sql +51 -0
  44. heurist/sql/sql_safety.py +101 -0
  45. heurist/utils/constants.py +1 -0
  46. heurist/utils/rel_to_dict_array.py +8 -0
  47. heurist/validators/__init__.py +3 -0
  48. heurist/validators/detail_validator.py +142 -0
  49. heurist/validators/exceptions.py +34 -0
  50. heurist/validators/parse_heurist_date.py +71 -0
  51. heurist/validators/record_validator.py +156 -0
  52. heurist/workflows/__init__.py +3 -0
  53. heurist/workflows/etl.py +66 -0
  54. heurist_api-0.1.2.dist-info/METADATA +453 -0
  55. heurist_api-0.1.2.dist-info/RECORD +80 -0
  56. heurist_api-0.1.2.dist-info/WHEEL +4 -0
  57. heurist_api-0.1.2.dist-info/entry_points.txt +2 -0
  58. heurist_api-0.1.2.dist-info/licenses/LICENSE +427 -0
  59. mock_data/__init__.py +22 -0
  60. mock_data/blocktext/__init__.py +0 -0
  61. mock_data/blocktext/single.py +7 -0
  62. mock_data/date/__init__.py +0 -0
  63. mock_data/date/compound_repeated.py +44 -0
  64. mock_data/date/compound_single.py +30 -0
  65. mock_data/date/simple_single.py +16 -0
  66. mock_data/date/timestamp_repeated.py +30 -0
  67. mock_data/enum/__init__.py +0 -0
  68. mock_data/enum/repeated.py +29 -0
  69. mock_data/enum/single.py +18 -0
  70. mock_data/file/__init__.py +0 -0
  71. mock_data/file/single.py +28 -0
  72. mock_data/float/__init__.py +0 -0
  73. mock_data/float/single.py +8 -0
  74. mock_data/freetext/__init__.py +0 -0
  75. mock_data/freetext/single.py +16 -0
  76. mock_data/geo/__init__.py +0 -0
  77. mock_data/geo/single.py +22 -0
  78. mock_data/resource/__init__.py +0 -0
  79. mock_data/resource/repeated.py +35 -0
  80. mock_data/resource/single.py +16 -0
heurist/__init__.py ADDED
@@ -0,0 +1 @@
1
+ PACKAGE_NAME = "heurist-api"
File without changes
heurist/api/client.py ADDED
@@ -0,0 +1,122 @@
1
+ """Heurist API client"""
2
+
3
+ import json
4
+ from typing import ByteString, Literal
5
+
6
+ import requests
7
+ from heurist.api.constants import MAX_RETRY, READTIMEOUT
8
+ from heurist.api.exceptions import APIException, ReadTimeout
9
+ from heurist.api.url_builder import URLBuilder
10
+ from heurist.api.utils import log_attempt_number
11
+ from tenacity import (
12
+ RetryError,
13
+ retry,
14
+ retry_if_exception_type,
15
+ stop_after_attempt,
16
+ )
17
+
18
+
19
+ class HeuristAPIClient:
20
+ """
21
+ Client for Heurist API.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ database_name: str,
27
+ session: requests.Session,
28
+ timeout_seconds: int | None = READTIMEOUT,
29
+ ) -> None:
30
+ self.database_name = database_name
31
+ self.url_builder = URLBuilder(database_name=database_name)
32
+ self.session = session
33
+ self.timeout = timeout_seconds
34
+
35
+ @retry(
36
+ retry=retry_if_exception_type(requests.exceptions.ReadTimeout),
37
+ stop=stop_after_attempt(MAX_RETRY),
38
+ after=log_attempt_number,
39
+ )
40
+ def call_heurist_api(self, url: str) -> ByteString | None:
41
+ response = self.session.get(url, timeout=(self.timeout))
42
+ return response
43
+
44
+ def get_response_content(self, url: str) -> ByteString | None:
45
+ """Request resources from the Heurist server.
46
+
47
+ Args:
48
+ url (str): Heurist API entry point.
49
+
50
+ Returns:
51
+ ByteString | None: Binary response returned from Heurist server.
52
+ """
53
+
54
+ try:
55
+ response = self.call_heurist_api(url=url)
56
+ except RetryError:
57
+ e = ReadTimeout(url=url, timeout=self.timeout)
58
+ raise SystemExit(e)
59
+ if not response:
60
+ e = APIException("No response.")
61
+ raise SystemExit(e)
62
+ elif response.status_code != 200:
63
+ e = APIException(f"Status {response.status_code}")
64
+ raise SystemExit(e)
65
+ elif "Cannot connect to database" == response.content.decode("utf-8"):
66
+ e = APIException("Could not connect to database.")
67
+ raise SystemExit(e)
68
+ else:
69
+ return response.content
70
+
71
+ def get_records(
72
+ self,
73
+ record_type_id: int,
74
+ form: Literal["xml", "json"] = "json",
75
+ users: tuple[int] = (),
76
+ ) -> bytes | list | None:
77
+ """Request all records of a certain type and in a certain data format.
78
+
79
+ Args:
80
+ record_type_id (int): Heurist ID of targeted record type.
81
+ form (Literal["xml", "json"], optional): Data format for requested
82
+ records. Defaults to "json".
83
+ users (tuple): Array of IDs of users who added the target records.
84
+
85
+ Returns:
86
+ bytes | list | None: If XML, binary response returned from Heurist
87
+ server, else JSON array.
88
+ """
89
+
90
+ url = self.url_builder.get_records(
91
+ record_type_id=record_type_id, form=form, users=users
92
+ )
93
+ if form == "json":
94
+ content = self.get_response_content(url)
95
+ json_string = content.decode("utf-8")
96
+ all_records = json.loads(json_string)["heurist"]["records"]
97
+ # Filter out linked records of a not the target type
98
+ correct_ids = [
99
+ r for r in all_records if r["rec_RecTypeID"] == str(record_type_id)
100
+ ]
101
+ # Filter out records by non-targeted users
102
+ if users and len(users) > 0:
103
+ return [r for r in correct_ids if int(r["rec_AddedByUGrpID"]) in users]
104
+ else:
105
+ return correct_ids
106
+ else:
107
+ return self.get_response_content(url)
108
+
109
+ def get_structure(self) -> bytes | None:
110
+ """Request the Heurist database's overall structure in XML format.
111
+
112
+ Returns:
113
+ bytes | list | None: If XML, binary response returned from Heurist server,
114
+ else JSON array.
115
+ """
116
+ url = self.url_builder.get_db_structure()
117
+ return self.get_response_content(url)
118
+
119
+ def get_relationship_markers(
120
+ self, form: Literal["xml", "json"] = "xml"
121
+ ) -> bytes | list | None:
122
+ return self.get_records(record_type_id=1, form=form)
@@ -0,0 +1,71 @@
1
+ """Heurist API session"""
2
+
3
+ import requests
4
+ from heurist.api.client import HeuristAPIClient
5
+ from heurist.api.constants import READTIMEOUT
6
+ from heurist.api.exceptions import AuthenticationError
7
+ from requests import Session
8
+
9
+
10
+ class HeuristAPIConnection:
11
+ def __init__(
12
+ self,
13
+ db: str,
14
+ login: str,
15
+ password: str,
16
+ read_timeout: int = READTIMEOUT,
17
+ post_timeout: int = 10,
18
+ ) -> None:
19
+ """
20
+ Session context for a connection to the Heurist server.
21
+
22
+ Args:
23
+ db (str): Heurist database name.
24
+ login (str): Username.
25
+ password (str): User's password.
26
+ read_timeout (int): Seconds to wait before raising a ReadTimeout.
27
+ post_timeout (int): Seconds to wait before raising an error when \
28
+ establishing a login connection.
29
+
30
+ Raises:
31
+ e: If the requests method fails, raise that exception.
32
+ AuthenticationError: If the Heurist server returns a bad status code, \
33
+ raise an exception.
34
+ """
35
+
36
+ self.db = db
37
+ self.__login = login
38
+ self.__password = password
39
+ self._readtimeout = read_timeout
40
+ self._posttimeout = post_timeout
41
+
42
+ def __enter__(self) -> Session:
43
+ self.session = requests.Session()
44
+ url = "https://heurist.huma-num.fr/heurist/api/login"
45
+
46
+ body = {
47
+ "db": self.db,
48
+ "login": self.__login,
49
+ "password": self.__password,
50
+ }
51
+ try:
52
+ response = self.session.post(url=url, data=body, timeout=self._posttimeout)
53
+ except requests.exceptions.ConnectTimeout as e:
54
+ print(
55
+ "\nUnable to log in to Heurist Huma-Num server. \
56
+ Connection timed out."
57
+ )
58
+ raise e
59
+ if response.status_code != 200:
60
+ message = response.json()["message"]
61
+ e = AuthenticationError(message)
62
+ raise SystemExit(e)
63
+
64
+ return HeuristAPIClient(
65
+ database_name=self.db,
66
+ session=self.session,
67
+ timeout_seconds=self._readtimeout,
68
+ )
69
+
70
+ def __exit__(self, exc_type, exc_val, exc_tb):
71
+ self.session.close()
@@ -0,0 +1,19 @@
1
+ """Constant text variables for Heurist API."""
2
+
3
+ import os
4
+
5
+ HUMA_NUM_SERVER = "https://heurist.huma-num.fr/heurist"
6
+
7
+ RECORD_XML_EXPORT_PATH = "/export/xml/flathml.php"
8
+
9
+ RECORD_JSON_EXPORT_PATH = "/hserv/controller/record_output.php"
10
+
11
+ STRUCTURE_EXPORT_PATH = "/hserv/structure/export/getDBStructureAsXML.php"
12
+
13
+ timeout_var = os.environ.get("READTIMEOUT", 10)
14
+ if isinstance(timeout_var, str):
15
+ timeout_var = int(timeout_var)
16
+
17
+ READTIMEOUT = timeout_var
18
+
19
+ MAX_RETRY = 3
@@ -0,0 +1,71 @@
1
+ import os
2
+
3
+ from dotenv import find_dotenv, load_dotenv
4
+ from heurist.api.connection import HeuristAPIConnection
5
+ from heurist.api.exceptions import MissingParameterException
6
+
7
+
8
+ class CredentialHandler:
9
+ env_file = find_dotenv()
10
+ db_key = "DB_NAME"
11
+ login_key = "DB_LOGIN"
12
+ password_key = "DB_PASSWORD"
13
+
14
+ def __init__(
15
+ self,
16
+ database_name: str | None = None,
17
+ login: str | None = None,
18
+ password: str | None = None,
19
+ debugging: bool = False,
20
+ ):
21
+ if not debugging:
22
+ load_dotenv(self.env_file)
23
+
24
+ params = [
25
+ (self.db_key, database_name),
26
+ (self.login_key, login),
27
+ (self.password_key, password),
28
+ ]
29
+
30
+ # Set all the secret variables in the environment
31
+ for key, var in params:
32
+ if var:
33
+ self.set_var(key=key, var=var)
34
+ # Confirm that the environment variable is set
35
+ v = self.get_var(key=key)
36
+ if not v or v == "":
37
+ e = MissingParameterException(parameter=key, env_file=self.env_file)
38
+ raise SystemExit(e)
39
+
40
+ def test_connection(self) -> None:
41
+ with HeuristAPIConnection(
42
+ db=self.get_database(), login=self.get_login(), password=self.get_password()
43
+ ) as _:
44
+ pass
45
+
46
+ @classmethod
47
+ def _reset_envvars(cls) -> None:
48
+ keys = [cls.db_key, cls.login_key, cls.password_key]
49
+ for key in keys:
50
+ if os.environ.get(key):
51
+ os.environ.pop(key)
52
+
53
+ @classmethod
54
+ def set_var(cls, key: str, var: str) -> None:
55
+ os.environ[key] = var
56
+
57
+ @classmethod
58
+ def get_var(cls, key: str) -> str | KeyError:
59
+ return os.getenv(key)
60
+
61
+ @classmethod
62
+ def get_database(cls) -> str:
63
+ return cls.get_var(key="DB_NAME")
64
+
65
+ @classmethod
66
+ def get_login(cls) -> str:
67
+ return cls.get_var(key="DB_LOGIN")
68
+
69
+ @classmethod
70
+ def get_password(cls) -> str:
71
+ return cls.get_var(key="DB_PASSWORD")
@@ -0,0 +1,45 @@
1
+ from .constants import MAX_RETRY
2
+
3
+
4
+ class APIException(Exception):
5
+ """Problem calling Heurist API."""
6
+
7
+
8
+ class AuthenticationError(Exception):
9
+ """Error raised when unable to authenticate Heurist login."""
10
+
11
+ def __init__(self, message):
12
+ self.message = """Authentication Error.
13
+ \tFailed to authenticate Heurist user login.
14
+ """
15
+ super().__init__(self.message)
16
+
17
+
18
+ class MissingParameterException(Exception):
19
+ """Exception raised for a missing parameter."""
20
+
21
+ def __init__(self, parameter: str, env_file: str):
22
+ self.message = f"""MissingParameter Exception.
23
+ \tMissing the variable '{parameter}'.
24
+ \tTried looking in the env file '{env_file}'.
25
+ """
26
+ super().__init__(self.message)
27
+
28
+
29
+ class ReadTimeout(Exception):
30
+ """Exception raised because the data returned by the Heurist \
31
+ server took too long to receive.
32
+ """
33
+
34
+ def __init__(self, url: str, timeout: int):
35
+ message = f"""ReadTimeout Error.
36
+ \tOn all {MAX_RETRY} tries, Heurist's server took too long (> {timeout} seconds) to \
37
+ send data from the following URL:
38
+ {url}
39
+ Solutions:
40
+ \t1. Try running the command again and hope the server / your internet is faster.
41
+ \t2. Set the READTIMEOUT environment variable immediately before the command and run it\
42
+ again, i.e. 'READTIMEOUT=20 heurist download'.
43
+ """
44
+ self.message = message
45
+ super().__init__(self.message)
@@ -0,0 +1,148 @@
1
+ """Class to compose URIs for calling the Heurist API."""
2
+
3
+ from typing import Literal
4
+
5
+ from heurist.api.constants import (
6
+ HUMA_NUM_SERVER,
7
+ RECORD_JSON_EXPORT_PATH,
8
+ RECORD_XML_EXPORT_PATH,
9
+ STRUCTURE_EXPORT_PATH,
10
+ )
11
+
12
+ COMMA = "%2C"
13
+ COLON = "%3A"
14
+
15
+
16
+ class URLBuilder:
17
+ """Class to construct endpoints for the Heurist API (on Huma-Num's server)."""
18
+
19
+ def __init__(self, database_name: str, server: str = HUMA_NUM_SERVER) -> None:
20
+ self.server = server
21
+ self.database_name = database_name
22
+
23
+ @property
24
+ def db_api(self) -> str:
25
+ return f"{self.server}{STRUCTURE_EXPORT_PATH}"
26
+
27
+ @property
28
+ def xml_record_api(self) -> str:
29
+ return f"{self.server}{RECORD_XML_EXPORT_PATH}"
30
+
31
+ @property
32
+ def json_record_api(self) -> str:
33
+ return f"{self.server}{RECORD_JSON_EXPORT_PATH}"
34
+
35
+ @classmethod
36
+ def _join_queries(cls, *args) -> str:
37
+ """Join 1 or more queries together with an ampersand.
38
+
39
+ Returns:
40
+ str: Fragment of a path for the URL.
41
+ """
42
+ return "&".join([a for a in args if a is not None])
43
+
44
+ @classmethod
45
+ def _join_list_items(cls, *args) -> str:
46
+ """Join 1 or more items in a list of queries.
47
+
48
+ Examples:
49
+ >>> item1 = '{"filter"%3A"value"}'
50
+ >>> item2 = '{"filter"%3A"value"}'
51
+ >>> item3 = '{"filter"%3A"value"}'
52
+ >>> URLBuilder._join_list_items(item1, item2, item3)
53
+ '[{"filter"%3A"value"}%2C{"filter"%3A"value"}%2C{"filter"%3A"value"}]'
54
+
55
+ Returns:
56
+ str: Fragment of a path for the URL.
57
+ """
58
+ start = "["
59
+ end = "]"
60
+ items = COMMA.join([a for a in args if a is not None])
61
+ return f"{start}{items}{end}"
62
+
63
+ @classmethod
64
+ def _make_filter_obj(cls, filter: str, value: str | int) -> str:
65
+ start = "{"
66
+ end = "}"
67
+ return f'{start}"{filter}"{COLON}"{value}"{end}'
68
+
69
+ @classmethod
70
+ def _join_comma_separated_values(cls, *args) -> str:
71
+ return COMMA.join([str(a) for a in args if a is not None])
72
+
73
+ def get_db_structure(self) -> str:
74
+ """
75
+ URL to retrieve the database structure.
76
+
77
+ Examples:
78
+ >>> db = "mock_db"
79
+ >>> builder = URLBuilder(db)
80
+ >>> builder.get_db_structure()
81
+ 'https://heurist.huma-num.fr/heurist/hserv/structure/export/getDBStructureAsXML.php?db=mock_db'
82
+
83
+ Returns:
84
+ str: URL to retrieve the database structure.
85
+ """
86
+ db = f"?db={self.database_name}"
87
+ return f"{self.db_api}{db}"
88
+
89
+ def get_records(
90
+ self,
91
+ record_type_id: int,
92
+ form: Literal["xml", "json"] = "xml",
93
+ users: tuple = (),
94
+ ) -> str:
95
+ """Build a URL to retrieve records of a certain type.
96
+
97
+ Examples:
98
+ >>> db = "mock_db"
99
+ >>> builder = URLBuilder(db)
100
+ >>> builder.get_records(101)
101
+ 'https://heurist.huma-num.fr/heurist/export/xml/flathml.php?q=[{"t"%3A"101"}%2C{"sortby"%3A"t"}]&a=1&db=mock_db&depth=all&linkmode=direct_links'
102
+
103
+ >>> db = "mock_db"
104
+ >>> builder = URLBuilder(db)
105
+ >>> builder.get_records(102, form="json")
106
+ 'https://heurist.huma-num.fr/heurist/hserv/controller/record_output.php?q=[{"t"%3A"102"}%2C{"sortby"%3A"t"}]&a=1&db=mock_db&depth=all&linkmode=direct_links&format=json&defs=0&extended=2'
107
+
108
+ >>> db = "mock_db"
109
+ >>> builder = URLBuilder(db)
110
+ >>> builder.get_records(102, users=(2,16,))
111
+ 'https://heurist.huma-num.fr/heurist/export/xml/flathml.php?q=[{"t"%3A"102"}%2C{"sortby"%3A"t"}%2C{"addedby"%3A"2%2C16"}]&a=1&db=mock_db&depth=all&linkmode=direct_links'
112
+
113
+
114
+ Args:
115
+ record_type_id (int): Heurist ID of the record type.
116
+ form (Literal["xml", "json"]): The format of the exported data.
117
+
118
+ Returns:
119
+ str: URL to retrieve records of a certain type.
120
+ """
121
+
122
+ a = "a=1"
123
+ db = "db=%s" % (self.database_name)
124
+ depth = "depth=all"
125
+ link_mode = "linkmode=direct_links"
126
+
127
+ if form == "json":
128
+ api = self.json_record_api
129
+ format_args = "format=json&defs=0&extended=2"
130
+ else:
131
+ api = self.xml_record_api
132
+ format_args = None
133
+
134
+ # Make the query based on parameters
135
+ record_type_filter = self._make_filter_obj(filter="t", value=record_type_id)
136
+ sortby_filter = self._make_filter_obj(filter="sortby", value="t")
137
+ if len(users) > 0:
138
+ user_string = self._join_comma_separated_values(*users)
139
+ users_filter = self._make_filter_obj(filter="addedby", value=user_string)
140
+ else:
141
+ users_filter = None
142
+ query_path = self._join_list_items(
143
+ record_type_filter, sortby_filter, users_filter
144
+ )
145
+ query = f"?q={query_path}"
146
+
147
+ path = self._join_queries(query, a, db, depth, link_mode, format_args)
148
+ return f"{api}{path}"
heurist/api/utils.py ADDED
@@ -0,0 +1,24 @@
1
+ from datetime import datetime
2
+
3
+ from .constants import MAX_RETRY
4
+
5
+
6
+ def log_attempt_number(retry_state) -> None:
7
+ """Simple logger for tenacity retry.
8
+
9
+ Args:
10
+ retry_state (tenacity.RetryCallState): Retry result from tenacity.
11
+ """
12
+
13
+ # Get the attempt number from the RetryCallState
14
+ attempt_number = retry_state.attempt_number
15
+
16
+ # Compose the console message
17
+ time = datetime.now().strftime("%H:%M:%S")
18
+ message = (
19
+ f"[{time}] ReadTimeout error when receiving data from the Heurist API. "
20
+ f"Retrying {attempt_number} / {MAX_RETRY} times..."
21
+ )
22
+
23
+ # Print the error message
24
+ print(message)
File without changes