pystrm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pystrm-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.3
2
+ Name: pystrm
3
+ Version: 0.1.0
4
+ Summary: Python Stream module for api to kafka topic
5
+ Requires-Dist: appdirs>=1.4.4
6
+ Requires-Dist: attrs>=25.1.0
7
+ Requires-Dist: confluent-kafka~=2.6.0
8
+ Requires-Dist: fastavro>=1.10.0
9
+ Requires-Dist: jsonschema>=4.23.0
10
+ Requires-Dist: multiprocessing-logging>=0.3.4
11
+ Requires-Dist: psycopg2-binary>=2.9.10
12
+ Requires-Dist: pyyaml>=6.0.2
13
+ Requires-Dist: xxhash>=3.5.0
14
+ Requires-Dist: yfinance>=0.2.65
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+
18
+ Project for stream data from yfinance to Kafka stream.
pystrm-0.1.0/README.md ADDED
@@ -0,0 +1 @@
1
+ Project for stream data from yfinance to Kafka stream.
@@ -0,0 +1,29 @@
1
+ [project]
2
+ name = "pystrm"
3
+ version = "0.1.0"
4
+ description = "Python Stream module for api to kafka topic"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "appdirs>=1.4.4",
9
+ "attrs>=25.1.0",
10
+ "confluent-kafka~=2.6.0",
11
+ "fastavro>=1.10.0",
12
+ "jsonschema>=4.23.0",
13
+ "multiprocessing-logging>=0.3.4",
14
+ "psycopg2-binary>=2.9.10",
15
+ "pyyaml>=6.0.2",
16
+ "xxhash>=3.5.0",
17
+ "yfinance>=0.2.65",
18
+ ]
19
+
20
+ [project.scripts]
21
+ pystrm = "pystrm.main:main"
22
+
23
+ [tool.uv.build-backend]
24
+ module-name = "pystrm"
25
+ namespace = true
26
+
27
+ [build-system]
28
+ requires = ["uv_build>=0.9.17,<0.10.0"]
29
+ build-backend = "uv_build"
File without changes
@@ -0,0 +1,42 @@
1
+ import logging
2
+ from attrs import define
3
+ from confluent_kafka.admin import AdminClient, NewTopic
4
+ from pystrm.utils.constants import KAFKA_BROKERS
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ @define(kw_only=True)
10
+ class KAdmin:
11
+ admin = AdminClient(KAFKA_BROKERS)
12
+
13
+ def topic_exists(self, topic: str) -> bool:
14
+ try:
15
+ all_topics = self.admin.list_topics()
16
+ except Exception as err:
17
+ logger.error(f"Failed to fetch list of topics. Error : {str(err)}")
18
+ return topic in all_topics.topics.keys()
19
+
20
+
21
+ def create_topic(self, topic: str, num_part: int = 1, replica: int = 1) -> None:
22
+ if not self.topic_exists(topic):
23
+ new_topic = [NewTopic(topic, num_partitions=num_part, replication_factor=replica)]
24
+ try:
25
+ self.admin.create_topics(new_topic)
26
+ logger.info(f"Topic {topic} has been created")
27
+ except Exception as err:
28
+ logger.error(f"Failed to create topic: {topic}. Error : {str(err)}")
29
+ else:
30
+ logger.warning(f"Topic {topic} already exists")
31
+
32
+
33
+ def delete_topic(self, topics: list[str]) -> None:
34
+ for topic in topics:
35
+ if not self.topic_exists(topic):
36
+ try:
37
+ self.admin.delete_topics([topic])
38
+ logger.warning(f"'{topic}' topic deleted")
39
+ except Exception as err:
40
+ logger.error(f"Failed to delete topic: {topic}. Error : {str(err)}")
41
+ else:
42
+ logger.info(f"Topic name {topic} does not exists")
@@ -0,0 +1,78 @@
1
+ import logging
2
+ from functools import lru_cache
3
+ from attrs import define, field
4
+ from json import dumps
5
+
6
+ from confluent_kafka import Consumer
7
+ from confluent_kafka.schema_registry.avro import AvroDeserializer
8
+ from confluent_kafka.serialization import SerializationContext, MessageField
9
+ from pystrm.kmain.kSchemaRegistry import KSR
10
+
11
+ from src.pystrm.utils.constants import KAFKA_BROKERS
12
+ from pystrm.utils.logger.logDecor import logtimer
13
+ from pystrm.utils.confs import get_clientSchema
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ @lru_cache
19
+ def schemaClient(topic: str, schema_type: str) -> KSR:
20
+
21
+ schema_str = get_clientSchema(topic, schema_type=schema_type)
22
+ schema_client = KSR(topic=topic, schema_str=dumps(schema_str), schema_type=schema_type)
23
+
24
+ return schema_client
25
+
26
+
27
+ @define(kw_only=True)
28
+ class kConsume:
29
+ topic: str = field(eq=str)
30
+ groupId: str = field(eq=str, default=None)
31
+ schema_type: str = field(eq=str, default=None)
32
+
33
+ @logtimer
34
+ def consume_message(self):
35
+
36
+ consumer = Consumer({"bootstrap.servers": KAFKA_BROKERS, "group.id": self.groupId})
37
+ consumer.subscribe([self.topic])
38
+
39
+ try:
40
+ while True:
41
+ msg = consumer.poll(0.5)
42
+ if msg is None:
43
+ continue
44
+ if msg.error():
45
+ logger.error("Failed to recieve message. Error: %s" % (str(msg.error())))
46
+ continue
47
+ logger.info("Message Consumed: %s" % (str(msg.value())))
48
+ except KeyboardInterrupt:
49
+ logger.warning("Keyboard Interrupt happened")
50
+ finally:
51
+ consumer.close()
52
+
53
+
54
+ @logtimer
55
+ def consume_serialized_message(self):
56
+
57
+ consumer = Consumer({"bootstrap.servers": KAFKA_BROKERS, "group.id": self.groupId})
58
+ consumer.subscribe([self.topic])
59
+ schema_client = schemaClient(self.topic, self.schema_type)
60
+ schema_str = schema_client.get_schema_str()
61
+ value_deserializer = AvroDeserializer(schema_client, schema_str)
62
+
63
+ try:
64
+ while True:
65
+ msg = consumer.poll(0.5)
66
+ if msg is None:
67
+ continue
68
+ if msg.error():
69
+ logger.error("Failed to recieve message. Error: %s" % (str(msg.error())))
70
+ continue
71
+ # logger.info("Message Consumed: %s" % (str(msg.value())))
72
+ message = msg.value()
73
+ deserialized_message = value_deserializer(message, SerializationContext(self.topic, MessageField.VALUE))
74
+ logger.info(f"Message Consumed: {deserialized_message}")
75
+ except KeyboardInterrupt:
76
+ logger.warning("Keyboard Interrupt happened")
77
+ finally:
78
+ consumer.close()
@@ -0,0 +1,98 @@
1
+ import logging
2
+ from functools import lru_cache
3
+ from attrs import define, field
4
+ from json import dumps
5
+ from uuid import uuid4
6
+ from jsonschema import validate as jsonValidate, ValidationError as jsonValidationError
7
+ from fastavro import validate as avroValidate
8
+ from fastavro.validation import ValidationError as avroValidationError
9
+
10
+ from confluent_kafka import Producer
11
+ from confluent_kafka.serialization import (
12
+ IntegerSerializer,
13
+ StringSerializer,
14
+ SerializationContext,
15
+ MessageField,
16
+ )
17
+ from confluent_kafka.schema_registry import SchemaRegistryClient
18
+ from confluent_kafka.schema_registry.avro import AvroSerializer
19
+ from confluent_kafka.schema_registry.json_schema import JSONSerializer
20
+ from numpy import int64
21
+
22
+ from pystrm.utils.constants import KAFKA_BROKERS, KAFKA_SCHEMA_CLIENT
23
+ from pystrm.utils.logger.logDecor import logtimer
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ @lru_cache
28
+ def get_producer():
29
+ return Producer(KAFKA_BROKERS)
30
+
31
+
32
+ @lru_cache
33
+ def get_ksr():
34
+ return SchemaRegistryClient(KAFKA_SCHEMA_CLIENT)
35
+
36
+
37
+ @define(kw_only=True)
38
+ class Kprod:
39
+ topic: str = field(eq=str)
40
+
41
+
42
+ @logtimer
43
+ def acked(self, err, msg) -> None:
44
+ if err is not None:
45
+ logger.error("Failed to deliv" \
46
+ "er message: %s: %s" % (str(msg), str(err)))
47
+ else:
48
+ logger.info("Message produced: %s" % (str(msg)))
49
+
50
+ return None
51
+
52
+
53
+ @logtimer
54
+ def prodDataWithJsonSerial(self, data: str, mykey: int | str | None) -> None:
55
+
56
+ producer = get_producer()
57
+
58
+ try:
59
+ producer.produce(self.topic, key=mykey, value=dumps(data).encode(), on_delivery=self.acked)
60
+
61
+ producer.flush()
62
+ except Exception as err:
63
+ logger.error(f"Error sending message: {str(err)}")
64
+ return None
65
+
66
+
67
+ @logtimer
68
+ def prodDataWithSerialSchema(self, schema, data: dict[str, int64 | str], mykey: int | str, schema_type: str = "JSON") -> None:
69
+
70
+ producer = get_producer()
71
+ schema_client = get_ksr()
72
+
73
+ try:
74
+ jsonValidate(data, schema) if schema_type == "JSON" else avroValidate(data, schema)
75
+ logger.info("Schema Validated")
76
+
77
+ key_serializer = IntegerSerializer() if isinstance(mykey, int) else StringSerializer()
78
+ value_serializer = JSONSerializer(dumps(schema), schema_client) if schema_type == "JSON" else AvroSerializer(schema_client, dumps(schema))
79
+
80
+ producer.produce(
81
+ topic=self.topic,
82
+ key=key_serializer(mykey),
83
+ value=value_serializer(data, SerializationContext(self.topic, MessageField.VALUE)),
84
+ headers={"correlation_id": key_serializer(str(uuid4()))},
85
+ on_delivery=self.acked
86
+ )
87
+
88
+ producer.flush()
89
+ except jsonValidationError as err:
90
+ logger.error(f"Json Validation Error: {str(err)}")
91
+ return None
92
+ except avroValidationError as err:
93
+ logger.error(f"Avro Validation Error: {str(err)}")
94
+ return None
95
+ except Exception as err:
96
+ logger.error(f"Error sending message: {str(err)}")
97
+ return None
98
+
@@ -0,0 +1,46 @@
1
+ import logging
2
+ from attrs import define, field
3
+ from confluent_kafka.schema_registry import SchemaRegistryClient, Schema
4
+ from confluent_kafka.schema_registry.error import SchemaRegistryError
5
+ from pystrm.utils.constants import KAFKA_SCHEMA_CLIENT
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ @define(kw_only=True)
11
+ class KSR:
12
+ topic: str = field(eq=str)
13
+ schema_str: str = field(eq=str)
14
+ schema_type: str = field(eq=str, default="AVRO")
15
+ schema_client = SchemaRegistryClient(KAFKA_SCHEMA_CLIENT)
16
+
17
+
18
+ def get_schema_version(self):
19
+ try:
20
+ schema_version = self.schema_client.get_latest_version(self.topic)
21
+ logger.info(f"{self.topic} schema does exists.")
22
+ return schema_version.schema_id
23
+ except SchemaRegistryError as err:
24
+ logger.warning("Schema does not exists : " + str(err))
25
+ return False
26
+
27
+
28
+ def get_schema_str(self):
29
+ try:
30
+ schema_id = self.get_schema_version()
31
+ schema = self.schema_client.get_schema(schema_id)
32
+ return schema.schema_str
33
+ except SchemaRegistryError as err:
34
+ logger.warning(f"Some error occured while fetching schema_id '{schema_id}': " + str(err))
35
+
36
+
37
+ def register_schema(self):
38
+ if not self.get_schema_version():
39
+ try:
40
+ schema = Schema(self.schema_str, self.schema_type)
41
+ self.schema_client.register_schema(self.topic, schema)
42
+ logger.info("Schema Registered")
43
+ except SchemaRegistryError as err:
44
+ logger.warning("Schema registry failed. Error: " + str(err))
45
+ else:
46
+ logger.warning(f"{self.topic} already registered")
@@ -0,0 +1,68 @@
1
+ import logging
2
+ import logging.config
3
+ import sys
4
+ from pprint import pformat
5
+ from time import sleep
6
+ from typing import Any
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from pystrm.utils.mainCalls.yfUtils import getLiveTickData
10
+ from pystrm.utils.confs import fetch_conf, fetch_prop
11
+ from pystrm.utils.constants import RUN_ID
12
+ from pystrm.utils.logger.logDecor import inOutLog
13
+ from multiprocessing_logging import install_mp_handler
14
+
15
+
16
+ @inOutLog
17
+ def main() -> None:
18
+
19
+ Path(Path.cwd()/'logs').mkdir(exist_ok=True)
20
+
21
+ config: dict[str, Any] = fetch_conf()['Logging']
22
+ config['handlers']['file_json']['filename'] += datetime.now().strftime('%Y-%m-%d.json')
23
+
24
+ logging.config.dictConfig(config)
25
+ logger = logging.getLogger()
26
+
27
+ install_mp_handler(logger)
28
+
29
+ logger.info("Intitiating program with run_id : " + str(RUN_ID))
30
+
31
+ __method_to_excute = {
32
+ "liveYfinanaceTick": getLiveTickData
33
+ }
34
+
35
+ try:
36
+ if len(sys.argv) != 3:
37
+ raise TypeError
38
+
39
+ mthd = sys.argv[1].strip()
40
+ conf_key = sys.argv[2].strip()
41
+
42
+ if mthd not in __method_to_excute.keys():
43
+ msg = "List of operation mentioned in dictionary for this package"
44
+ raise KeyError
45
+ else:
46
+ logger.info(f"Operation {mthd} exists. Validating other input")
47
+
48
+ if (conf_key.split('.')[0] not in fetch_prop().keys()) or (conf_key.split('.')[1] not in fetch_prop()[conf_key.split('.')[0]]):
49
+ msg = f"key:{conf_key.split('.')[0]} and value: {conf_key.split('.')[1]} pair does not exists in tables.yml"
50
+ raise KeyError
51
+ else:
52
+ logger.info(f"Configuration found in tables.yml for {conf_key}")
53
+ logger.info("Config found for this operation from tables.yml are as below:")
54
+ logger.info(f"\n{pformat(fetch_prop()[conf_key.split('.')[0]], indent=4)}")
55
+
56
+ return __method_to_excute[mthd](conf_key)
57
+ except TypeError:
58
+ logger.critical(f"main() function takes exactly 2 arguments ({len(sys.argv[1:])} given)")
59
+ sleep(1)
60
+ sys.exit(1)
61
+ except KeyError:
62
+ logger.critical(f'Key not found: {msg}')
63
+ sleep(1)
64
+ sys.exit(1)
65
+
66
+
67
+ if __name__ == '__main__':
68
+ main()
File without changes
@@ -0,0 +1,249 @@
1
+ import logging
2
+ import asyncio
3
+ from operator import itemgetter
4
+ from multiprocessing import Pool
5
+ from time import sleep
6
+ from typing import Any
7
+ import os
8
+
9
+ from numpy import int64
10
+
11
+ from pystrm.kmain.kProducer import Kprod
12
+ # from pystrm.schema.yfSchema.ticksSchemaObj import fastInfoSchemaObject
13
+
14
+ from pystrm.utils.confs import fetch_conf, get_clientSchema, streamConf
15
+ from pystrm.utils.kUtils import createKtopic, registerClientSchema
16
+ from pystrm.utils.logger.logDecor import logtimer, inOutLog
17
+
18
+
19
+ import appdirs as ad
20
+ ad.user_cache_dir = lambda *args: "/tmp"
21
+ import yfinance as yf # noqa: E402
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def dataDupCheck(data_dct: dict[str, Any], typ: str, data: dict[str, int64 | str], symb: str) -> bool:
27
+
28
+ if (typ + "." + symb) in data_dct.keys():
29
+ if data_dct[typ + "." + symb] == data:
30
+ return False
31
+ else:
32
+ data_dct[typ + "." + symb] = data
33
+ return True
34
+ else:
35
+ data_dct[typ + "." + symb] = data
36
+ return True
37
+
38
+
39
+ async def validSend(kobj: Kprod, typ: str, data_dct: dict[str, Any], symb: str, data: dict[str, int64 | str], schema_type: str, schema: object) -> None:
40
+ if dataDupCheck(data_dct=data_dct, typ=typ, data=data, symb=symb):
41
+ logger.info(f"New record found for symbol {symb}")
42
+ kobj.prodDataWithSerialSchema(schema=schema, data=data, mykey=symb, schema_type=schema_type)
43
+ else:
44
+ logger.info(f"Duplicate record found for symbol {symb}")
45
+
46
+
47
+ @logtimer
48
+ async def asyncTicker(kobj: Kprod, symb: str, meth: str):
49
+ data = yf.Ticker(symb+".NS").__getattribute__(meth)
50
+ kobj.prodDataWithJsonSerial(data=data, mykey=symb)
51
+
52
+
53
+ @inOutLog
54
+ async def ticker(kobj: Kprod, symbol: list[str], param_dct: dict[str, Any]) -> None:
55
+ """_summary_
56
+
57
+ Args:
58
+ symbol (str): symbol for which ticker data will be generated
59
+ """
60
+
61
+ indx: int = 0
62
+
63
+ if len(symbol) == 0:
64
+ logger.info("Symbol list is of zero size")
65
+ return
66
+
67
+ while True:
68
+ indx = indx % len(symbol)
69
+
70
+ if indx == 0:
71
+ sleep(1)
72
+
73
+ try:
74
+ await asyncTicker(kobj, symbol[indx], param_dct['prop_key'])
75
+ except KeyboardInterrupt:
76
+ logger.error("KeyboardInterrrupt happened")
77
+ break
78
+ except Exception as e:
79
+ logger.error(str(e))
80
+
81
+ if param_dct['type'] != 'Streaming':
82
+ sleep(5)
83
+ break
84
+
85
+ indx += 1
86
+
87
+ return None
88
+
89
+
90
+ @logtimer
91
+ async def asyncFastInfo(symb: str, meth: list[str]) -> object:
92
+ data = dict(zip([item.lower() for item in meth], list(itemgetter(*meth)(yf.Ticker(symb+".NS").fast_info))))
93
+ # dataObjt = fastInfoSchemaObject(*data)
94
+ return data
95
+
96
+
97
+ @inOutLog
98
+ async def fastInfo(kobj: Kprod, symbol: list[str], param_dct: dict[str, Any]) -> None:
99
+ """_summary_
100
+
101
+ Args:
102
+ symbol (str): symbol for which ticker data will be generated
103
+ """
104
+
105
+ indx: int = 0
106
+
107
+ if len(symbol) == 0:
108
+ logger.info("Symbol list is of zero size")
109
+ return
110
+
111
+ schema = get_clientSchema(param_dct['prop_key'], param_dct['schema_type'])
112
+
113
+ dupCheck = dict()
114
+
115
+ while True:
116
+ print(dupCheck)
117
+ indx = indx % len(symbol)
118
+
119
+ if indx == 0:
120
+ sleep(1)
121
+
122
+ try:
123
+ data = await asyncFastInfo(symbol[indx], param_dct['infolst'])
124
+ await validSend(kobj, "fastInfo", dupCheck, symbol[indx], data, param_dct['schema_type'], schema)
125
+ except KeyboardInterrupt:
126
+ logger.warning("KeyboardInterrrupt happened")
127
+ except Exception as e:
128
+ logger.error(str(e))
129
+
130
+ if param_dct['type'] != 'Streaming':
131
+ sleep(5)
132
+ break
133
+
134
+ indx += 1
135
+
136
+ return None
137
+
138
+
139
+ # @inOutLog
140
+ # @logtimer
141
+ # async def multiTicker(kobj: Kprod, symbol: list[str], param_dct: dict[str, Any]) -> None:
142
+ # """_summary_
143
+
144
+ # Args:
145
+ # symbol (str): symbol for which ticker data will be generated
146
+
147
+ # Returns:
148
+ # dict[str, Any]: Fetch ticker data
149
+ # """
150
+
151
+ # # schema_client = SchemaRegistryClient(KAFKA_SCHEMA_CLIENT)
152
+
153
+ # indx: int = 0
154
+
155
+ # if len(symbol) == 0:
156
+ # logger.info("Symbol list is of zero size")
157
+ # return
158
+
159
+ # while True:
160
+ # indx = indx % len(symbol)
161
+ # try:
162
+ # data = await asyncFastInfo(symbol[indx], param_dct['infolst'])
163
+ # await kobj.prodDataWithAvroSerialSchema(value=data, key=symbol[indx])
164
+ # sleep(1)
165
+ # except KeyboardInterrupt:
166
+ # logger.warning("KeyboardInterrrupt happened")
167
+ # break
168
+ # except Exception as e:
169
+ # logger.error(str(e))
170
+
171
+ # if param_dct['type'] != 'Streaming':
172
+ # sleep(5)
173
+ # break
174
+
175
+ # indx += 1
176
+ # return None
177
+
178
+
179
+ @inOutLog
180
+ @logtimer
181
+ def procHandler(param_dct: dict[str, Any], symb: list[str]) -> None:
182
+ """Fetch data from Yahoo Finance till market close in multiprocess and concurrrent waiting manner
183
+
184
+ Args:
185
+ param_dct (dict[str, str]): Parameter dictionary for execution
186
+ symb (list[str]): List of stock symbols for fetch data
187
+ """
188
+
189
+
190
+ kobj = Kprod(topic=param_dct['prop_key'])
191
+
192
+ __EXECUTE_METHOD = {
193
+ "tick": ticker
194
+ # ,"multitick": multiTicker
195
+ ,"fastinfo": fastInfo
196
+ }
197
+
198
+ asyncio.run(__EXECUTE_METHOD[param_dct['ldt']](kobj, symb, param_dct))
199
+ return None
200
+
201
+
202
+ def process_status(err):
203
+ if err is not None:
204
+ logger.error("Failed process: %s" % (str(err)))
205
+ else:
206
+ logger.info("Process Success")
207
+
208
+
209
+ @inOutLog
210
+ def getStreamData(key: str) -> None:
211
+ """Generate data from Jugaad package
212
+
213
+ Args:
214
+ key (str): Take config key as input
215
+ """
216
+
217
+ num_proc, num_process, symbols, prop_dict = streamConf(key)
218
+
219
+ createKtopic(topic=prop_dict['prop_key'], num_part=9, replica=3)
220
+
221
+ if 'schema_type' in prop_dict.keys():
222
+ registerClientSchema(topic=prop_dict['prop_key'], schema_type=prop_dict['schema_type'])
223
+
224
+ try:
225
+ pool = Pool(processes=os.cpu_count())
226
+
227
+ input_list = list()
228
+ for i in range(1, num_proc+1):
229
+ if i == num_proc:
230
+ input_list.append((prop_dict, symbols[(i - 1)*num_process:]))
231
+ else:
232
+ input_list.append((prop_dict, symbols[(i-1)*num_process:i*num_process]))
233
+
234
+ ar = pool.starmap_async(procHandler, input_list, error_callback=process_status)
235
+ ar.wait(timeout=fetch_conf()['Market']['LiveTime'])
236
+ if ar.ready():
237
+ logger.info("All task finished")
238
+ else:
239
+ logger.error("Some task still running")
240
+ pool.terminate()
241
+ return None
242
+ except KeyboardInterrupt as e:
243
+ logger.warning("Keyboard Interrupt : " + str(e))
244
+ pool.terminate()
245
+ return None
246
+ except Exception as e:
247
+ logger.error(str(e))
248
+ pool.terminate()
249
+ return None
File without changes
@@ -0,0 +1,94 @@
1
+ from typing import Any
2
+ from yaml import safe_load
3
+ from fastavro.schema import load_schema
4
+ import json
5
+ #import requests
6
+ #from utils import BASE_URL, HEADERS, NIFTY50
7
+
8
+
9
+ def fetch_prop() -> dict[str, Any]:
10
+
11
+ with open('../config/tables.yaml', 'r') as tblinfo:
12
+ prop_dict = safe_load(tblinfo.read())
13
+
14
+ return prop_dict
15
+
16
+
17
+ def fetch_conf() -> dict[str, Any]:
18
+
19
+ with open('../config/config.yaml', 'r') as cfginfo:
20
+ conf_dict = safe_load(cfginfo.read())
21
+
22
+ return conf_dict
23
+
24
+
25
+ def param_init(key: str) -> Any:
26
+
27
+ conf_key = key.split('.')[0]
28
+ prop_key = key.split('.')[1]
29
+ conf_dict = fetch_conf()[conf_key]
30
+ prop_dict = fetch_prop()[conf_key][prop_key]
31
+
32
+ return conf_key, prop_key, conf_dict, prop_dict
33
+
34
+
35
+ def fetch_db_dtl(db_type: str) -> dict[str, str | int]:
36
+ db_dtl = fetch_conf()[db_type]
37
+ return db_dtl
38
+
39
+
40
+ def get_clientSchema(fl_nm: str, schema_type: str):
41
+
42
+ path = fetch_conf()["Kafka"]["schema"][schema_type.lower()]
43
+
44
+ if schema_type == "JSON":
45
+ fl_nm += ".json"
46
+ with open(path + fl_nm) as fl:
47
+ schema = json.load(fl)
48
+ else:
49
+ fl_nm += ".avsc"
50
+ schema = load_schema(path + fl_nm)
51
+
52
+ return schema
53
+
54
+ def streamConf(key: str) -> tuple[int, int, list[str], dict[str, Any]]:
55
+
56
+ conf_key, prop_key, conf_dict, prop_dict = param_init(key)
57
+ symbols: list[str] = fetch_conf()['Market']['Symbols']
58
+
59
+ prop_dict['prop_key'] = prop_key.lower()
60
+
61
+ num_proc: int = int(conf_dict['num_process'])
62
+
63
+ num_process: int = len(symbols)//num_proc
64
+
65
+ if len(symbols) % num_proc == 0:
66
+ num_proc = num_proc
67
+ else:
68
+ num_proc = num_proc + 1
69
+
70
+ return (num_proc, num_process, symbols, prop_dict)
71
+
72
+ '''@logtimer
73
+ @staticmethod
74
+ def niftySymbols(url: str) -> list[str]:
75
+
76
+ session = requests.Session()
77
+ r = session.get(BASE_URL, headers=HEADERS, timeout=5)
78
+ cookies = dict(r.cookies)
79
+
80
+ response = session.get(url, timeout=5, headers=HEADERS, cookies=cookies)
81
+ content = response.content.decode('utf-8')
82
+
83
+ columns=['Company Name', 'Industry', 'Symbol', 'Series', 'ISIN Code']
84
+ data_lst = [x.strip().split(',') for x in content.splitlines() if x.strip().split(',') != columns]
85
+
86
+ df=pd.DataFrame(data_lst, columns=columns)
87
+ symbols_lst = df['Symbol'].tolist()
88
+
89
+ return symbols_lst'''
90
+
91
+
92
+ #print(niftySymbols(NIFTY50))
93
+ #print(param_init('JugaadData.PriceInfo'))
94
+ #print(fetch_conf())
@@ -0,0 +1,17 @@
1
+ from typing import Final
2
+ from datetime import datetime
3
+ from pystrm.utils.confs import fetch_conf
4
+ from socket import gethostname
5
+ from xxhash import xxh32_intdigest
6
+
7
+
8
+ RUN_ID: Final[int] = int(datetime.now().strftime('%Y%m%d') + str(xxh32_intdigest("pystream" + datetime.now().strftime('%H:%M:%S.%f'))))
9
+
10
+ CURRENT_DATE: Final[str] = datetime.today().strftime('%Y-%m-%d')
11
+
12
+ KAFKA_BROKERS: Final[dict[str, str]] = fetch_conf()['Kafka']['kafka-broker-conf'] | {'client.id': gethostname()}
13
+
14
+ KAFKA_SCHEMA_CLIENT: Final[dict[str, str]] = fetch_conf()['Kafka']['kafka-schema-client-conf']
15
+
16
+
17
+ __all__ = ('CURRENT_DATE', 'KAFKA_BROKERS', 'KAFKA_SCHEMA_CLIENT')
@@ -0,0 +1,24 @@
1
+ from json import dumps
2
+ from pystrm.kmain.kAdminstrator import KAdmin
3
+ from pystrm.kmain.kSchemaRegistry import KSR
4
+ from pystrm.utils.confs import get_clientSchema
5
+
6
+
7
+ def createKtopic(topic: str, num_part: int = 1, replica: int = 1) -> None:
8
+
9
+ adm = KAdmin()
10
+ adm.create_topic(topic=topic, num_part=num_part, replica=replica)
11
+
12
+ return None
13
+
14
+
15
+ def registerClientSchema(topic: str, schema_type: str) -> None:
16
+
17
+ schema_str = get_clientSchema(topic, schema_type=schema_type)
18
+ schema_register = KSR(topic=topic, schema_str=dumps(schema_str), schema_type=schema_type)
19
+ schema_register.register_schema()
20
+
21
+ return None
22
+
23
+ def to_dict(obj: object, ctx):
24
+ return dict(vars(obj))
@@ -0,0 +1,38 @@
1
+ from logging.config import ConvertingList
2
+ from logging.handlers import QueueHandler, QueueListener
3
+ from queue import Queue
4
+ from atexit import register
5
+
6
+
7
+ def _resolve_handlers(lst: ConvertingList):
8
+ if not isinstance(lst, ConvertingList):
9
+ return lst
10
+
11
+ # Indexing the list performs the evaluation.
12
+ return [lst[i] for i in range(len(lst))]
13
+
14
+
15
+ class QueueListenerHandler(QueueHandler):
16
+
17
+ def __init__(self, handlers, respect_handler_level=True, auto_run=True, queue=Queue(-1)):
18
+ super().__init__(queue)
19
+ handlers = _resolve_handlers(handlers)
20
+ self._listener = QueueListener(
21
+ self.queue,
22
+ *handlers,
23
+ respect_handler_level=respect_handler_level)
24
+ if auto_run:
25
+ self.start()
26
+ register(self.stop)
27
+
28
+
29
+ def start(self):
30
+ self._listener.start()
31
+
32
+
33
+ def stop(self):
34
+ self._listener.stop()
35
+
36
+
37
+ def emit(self, record):
38
+ return super().emit(record)
File without changes
@@ -0,0 +1,80 @@
1
+ import datetime as dt
2
+ import json
3
+ import logging
4
+ from pystrm.utils.constants import RUN_ID
5
+
6
+ LOG_RECORD_BUILTIN_ATTRS = {
7
+ "args",
8
+ "asctime",
9
+ "created",
10
+ "exc_info",
11
+ "exc_text",
12
+ "filename",
13
+ "funcName",
14
+ "levelname",
15
+ "levelno",
16
+ "lineno",
17
+ "module",
18
+ "msecs",
19
+ "message",
20
+ "msg",
21
+ "name",
22
+ "pathname",
23
+ "process",
24
+ "processName",
25
+ "relativeCreated",
26
+ "stack_info",
27
+ "thread",
28
+ "threadName",
29
+ "taskName",
30
+ }
31
+
32
+
33
+ class logJSONFormatter(logging.Formatter):
34
+ def __init__(
35
+ self,
36
+ *,
37
+ fmt_keys: dict[str, str] | None = None,
38
+ ):
39
+ super().__init__()
40
+ self.fmt_keys = fmt_keys if fmt_keys is not None else {}
41
+
42
+
43
+ def format(self, record: logging.LogRecord) -> str:
44
+ message = self._prepare_log_dict(record)
45
+ return json.dumps(message, default=str)
46
+
47
+ def _prepare_log_dict(self, record: logging.LogRecord):
48
+ always_fields = {
49
+ "run_id": RUN_ID,
50
+ "jobtype": "pyStream",
51
+ "message": record.getMessage(),
52
+ "timestamp": dt.datetime.fromtimestamp(
53
+ record.created, tz=None
54
+ ).isoformat(),
55
+ }
56
+ if record.exc_info is not None:
57
+ always_fields["exc_info"] = self.formatException(record.exc_info)
58
+
59
+ if record.stack_info is not None:
60
+ always_fields["stack_info"] = self.formatStack(record.stack_info)
61
+
62
+ message = {
63
+ key: msg_val
64
+ if (msg_val := always_fields.pop(val, None)) is not None
65
+ else getattr(record, val)
66
+ for key, val in self.fmt_keys.items()
67
+ }
68
+ message.update(always_fields)
69
+
70
+ for key, val in record.__dict__.items():
71
+ if key not in LOG_RECORD_BUILTIN_ATTRS:
72
+ message[key] = val
73
+
74
+ return message
75
+
76
+
77
+ class NonErrorFilter(logging.Filter):
78
+
79
+ def filter(self, record: logging.LogRecord) -> bool | logging.LogRecord:
80
+ return record.levelno <= logging.INFO
@@ -0,0 +1,33 @@
1
+ import logging
2
+ import inspect
3
+ from time import perf_counter
4
+ from typing import Any, Callable
5
+ from functools import wraps, partial
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def logtimer(func: Callable[..., Any]) -> Callable[..., Any]:
11
+ @wraps(func)
12
+ def wrapper(*args: Any, **kwargs: Any):
13
+ start = perf_counter()
14
+ result = func(*args, **kwargs)
15
+ logger.info(f"Execution of {func.__name__} took: {(perf_counter() - start):.6f} sec")
16
+ return result
17
+ return wrapper
18
+
19
+
20
+ def funcLifeLog(func: Callable[..., Any], logger: logging.Logger) -> Callable[..., Any]:
21
+ @wraps(func)
22
+ def wrapper(*args: Any, **kwargs: Any):
23
+ logger.info(f"Calling for execution of {func.__name__}")
24
+ func_args = inspect.signature(func).bind(*args, **kwargs).arguments
25
+ func_args_str = ", ".join(map("{0[0]} = {0[1]!r}".format, func_args.items()))
26
+ print(f"{func.__module__}.{func.__qualname__} ( {func_args_str} )")
27
+ value = func(*args, **kwargs)
28
+ logger.info(f"Finished execution of {func.__name__}")
29
+ return value
30
+ return wrapper
31
+
32
+
33
+ inOutLog = partial(funcLifeLog, logger=logger)
@@ -0,0 +1,61 @@
1
+ import logging
2
+ import psycopg2 as psg
3
+ from pystrm.utils.constants import RUN_ID
4
+ import datetime as dt
5
+ from pystrm.utils.confs import fetch_db_dtl
6
+
7
+
8
+ postgres_dtl = fetch_db_dtl('Postgresql')
9
+
10
+ try:
11
+ connection = psg.connect(host=postgres_dtl['hostname'], dbname=postgres_dtl['db'], user=postgres_dtl['username'], password=postgres_dtl['password'], port=postgres_dtl['port'])
12
+ except (Exception, psg.DatabaseError) as e:
13
+ print("RDBMS connection failed to database : " + str(e))
14
+ else:
15
+ cursor = connection.cursor()
16
+
17
+
18
+ def execute_query(qry: str) -> None:
19
+ try:
20
+ cursor.execute(qry)
21
+ connection.commit()
22
+ except Exception as e:
23
+ print(f'Error {e}')
24
+ print('Anything else that you feel is useful')
25
+ connection.rollback()
26
+
27
+
28
+ def close_connection():
29
+ cursor.close()
30
+ connection.close()
31
+
32
+
33
+ log_tbl: str = f"""CREATE TABLE IF NOT EXISTS {postgres_dtl['schema']}.{postgres_dtl['table']} (
34
+ run_id BIGINT NOT NULL,
35
+ job_type VARCHAR(200) NOT NULL,
36
+ level_name VARCHAR(200) NOT NULL,
37
+ message TEXT NOT NULL,
38
+ logger VARCHAR(200) NOT NULL,
39
+ module VARCHAR(200) NOT NULL,
40
+ function_name VARCHAR(200) NOT NULL,
41
+ filename VARCHAR(200) NOT NULL,
42
+ line_num VARCHAR(200) NOT NULL,
43
+ log_time VARCHAR(200) NOT NULL,
44
+ PRIMARY KEY(run_id, log_time)
45
+ );"""
46
+
47
+ execute_query(log_tbl)
48
+
49
+
50
+ class CustomHandler(logging.StreamHandler):
51
+
52
+ def __init__(self):
53
+ super().__init__()
54
+
55
+ def emit(self, record: logging.LogRecord) -> None:
56
+ if record:
57
+ msg = record.getMessage()
58
+ if msg.startswith('"') and msg.endswith('"'):
59
+ msg = msg[1:-1]
60
+ msg = msg.replace("'", "`")
61
+ execute_query(f"INSERT INTO {postgres_dtl['schema']}.{postgres_dtl['table']} (run_id, job_type, level_name, message, logger, module, function_name, filename, line_num, log_time) VALUES ({RUN_ID},'pyStream','{record.levelname}','{msg}','{record.name}','{record.module}','{record.funcName}','{record.filename}',{record.lineno},'{dt.datetime.fromtimestamp(record.created, tz=None).isoformat()}')")
File without changes
@@ -0,0 +1,13 @@
1
+ from pystrm.stream.yfStream.ticksStream import getStreamData
2
+ from pystrm.utils.logger.logDecor import inOutLog
3
+
4
+ @inOutLog
5
+ @staticmethod
6
+ def getLiveTickData(key: str) -> None:
7
+ """_summary_
8
+
9
+ Args:
10
+ key (str): Fetch live quote data from package yfinance and push it to kafka producer.
11
+ """
12
+
13
+ getStreamData(key=key)