shareddata 2.0.30__tar.gz → 2.0.32__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {shareddata-2.0.30 → shareddata-2.0.32}/PKG-INFO +1 -1
- {shareddata-2.0.30 → shareddata-2.0.32}/setup.cfg +1 -1
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/MultiProc.py +2 -2
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedData.py +2 -2
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataPeriod.py +5 -1
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataRealTime.py +2 -2
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataTable.py +5 -5
- shareddata-2.0.30/src/SharedData/SharedDataTimeSeries.py → shareddata-2.0.32/src/SharedData/SharedDataTimeSeries copy.py +12 -12
- shareddata-2.0.32/src/SharedData/SharedDataTimeSeries.py +572 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/PKG-INFO +1 -1
- {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/SOURCES.txt +1 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/LICENSE +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/README.md +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/pyproject.toml +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/Defaults.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/Logger.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/LoggerConsumerProcess.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/Metadata.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SeriesLib.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataAWSKinesis.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataAWSS3.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataFeeder.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataFrame.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataRealTimeProcess.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataTableIndex.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedDataTableIndexJit.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/SharedNumpy.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/Utils.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/SharedData/__init__.py +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/dependency_links.txt +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/requires.txt +0 -0
- {shareddata-2.0.30 → shareddata-2.0.32}/src/shareddata.egg-info/top_level.txt +0 -0
|
@@ -36,7 +36,7 @@ from threading import Thread
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
# load all files in a directory into memory
|
|
39
|
-
def io_bound(thread_func, iterator, args,
|
|
39
|
+
def io_bound(thread_func, iterator, args, maxproc=None, maxthreads=10):
|
|
40
40
|
results = []
|
|
41
41
|
# determine chunksize
|
|
42
42
|
niterator = len(iterator)
|
|
@@ -67,7 +67,7 @@ def io_bound(thread_func, iterator, args, maxthreads=10, maxproc=None):
|
|
|
67
67
|
def io_bound_process(thread_func, proc_iterator, args, maxthreads):
|
|
68
68
|
results = []
|
|
69
69
|
# create a thread pool
|
|
70
|
-
nthreads = len(
|
|
70
|
+
nthreads = len(proc_iterator)
|
|
71
71
|
nthreads = min(nthreads,maxthreads)
|
|
72
72
|
if nthreads>0:
|
|
73
73
|
with ThreadPoolExecutor(nthreads) as exe:
|
|
@@ -136,4 +136,8 @@ class SharedDataPeriod:
|
|
|
136
136
|
return fpath.is_file()
|
|
137
137
|
else:
|
|
138
138
|
return False
|
|
139
|
-
|
|
139
|
+
|
|
140
|
+
def create_timeseries(self,tag,startDate,columns,overwrite=False):
|
|
141
|
+
self.tags[tag] = SharedDataTimeSeries(\
|
|
142
|
+
self,tag,startDate=startDate,columns=columns,overwrite=overwrite)
|
|
143
|
+
return self.tags[tag].data
|
|
@@ -11,7 +11,7 @@ class SharedDataRealTime:
|
|
|
11
11
|
# producer dictionary
|
|
12
12
|
producer = {}
|
|
13
13
|
|
|
14
|
-
def
|
|
14
|
+
def broadcast(shdata,feeder,period,tag,idx,col):
|
|
15
15
|
producer = SharedDataRealTime.getProducer(shdata)
|
|
16
16
|
data = shdata[feeder][period][tag].loc[idx,col].values
|
|
17
17
|
|
|
@@ -64,7 +64,7 @@ class SharedDataRealTime:
|
|
|
64
64
|
KinesisStreamProducer(stream_name=streamname)
|
|
65
65
|
return SharedDataRealTime.producer[shdata.database]
|
|
66
66
|
|
|
67
|
-
def
|
|
67
|
+
def subscribe(shdata):
|
|
68
68
|
today = pd.Timestamp(pd.Timestamp.now().date())
|
|
69
69
|
wd = shdata['WATCHDOG']['D1']
|
|
70
70
|
dfwatchdog = wd[today]
|
|
@@ -104,12 +104,12 @@ class SharedDataTable:
|
|
|
104
104
|
self.init_time = time.time() - self.init_time
|
|
105
105
|
|
|
106
106
|
def ismalloc(self):
|
|
107
|
-
path, shm_name = self.get_path(
|
|
107
|
+
path, shm_name = self.get_path()
|
|
108
108
|
[self.shm, ismalloc] = self.sharedData.malloc(shm_name)
|
|
109
109
|
return ismalloc
|
|
110
110
|
|
|
111
111
|
def create(self, names, formats, size):
|
|
112
|
-
path, shm_name = self.get_path(
|
|
112
|
+
path, shm_name = self.get_path()
|
|
113
113
|
#TODO: WRITE FILE WITH POINTER TO SHARED DATA TO KEEP TRACK OF OPENED MEMORY
|
|
114
114
|
check_pkey = True
|
|
115
115
|
npkeys = len(self.keys.pkeycolumns)
|
|
@@ -400,7 +400,7 @@ class SharedDataTable:
|
|
|
400
400
|
self.records.table = self
|
|
401
401
|
|
|
402
402
|
def write(self):
|
|
403
|
-
path, shm_name = self.get_path(
|
|
403
|
+
path, shm_name = self.get_path()
|
|
404
404
|
|
|
405
405
|
try:
|
|
406
406
|
self.acquire()
|
|
@@ -574,7 +574,7 @@ class SharedDataTable:
|
|
|
574
574
|
|
|
575
575
|
return rec
|
|
576
576
|
|
|
577
|
-
def get_path(self
|
|
577
|
+
def get_path(self):
|
|
578
578
|
shm_name = self.sharedData.user + '/' + self.sharedData.database + '/' \
|
|
579
579
|
+ self.sharedDataFeeder.feeder + '/' + self.dataset
|
|
580
580
|
if os.name=='posix':
|
|
@@ -609,7 +609,7 @@ class SharedDataTable:
|
|
|
609
609
|
raise Exception('Tried to release semaphore without acquire!')
|
|
610
610
|
|
|
611
611
|
def free(self):
|
|
612
|
-
path, shm_name = self.get_path(
|
|
612
|
+
path, shm_name = self.get_path()
|
|
613
613
|
self.sharedData.free(shm_name)
|
|
614
614
|
self.sharedData.free(shm_name+'#pkey')
|
|
615
615
|
self.sharedData.free(shm_name+'#dateidx')
|
|
@@ -42,7 +42,7 @@ class SharedDataTimeSeries:
|
|
|
42
42
|
self.columns = pd.Index([])
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
if value is None: #
|
|
45
|
+
if value is None: #read dataset tag
|
|
46
46
|
feeder = self.sharedDataFeeder.feeder
|
|
47
47
|
dataset = sharedDataPeriod.dataset
|
|
48
48
|
sharedData = sharedDataPeriod.sharedData
|
|
@@ -73,9 +73,9 @@ class SharedDataTimeSeries:
|
|
|
73
73
|
self.ctimeidx = sharedDataPeriod.getContinousTimeIndex(self.startDate)
|
|
74
74
|
|
|
75
75
|
#allocate memory
|
|
76
|
-
self.isCreate = self.
|
|
76
|
+
self.isCreate = self.malloc()
|
|
77
77
|
if self.isCreate:
|
|
78
|
-
self.
|
|
78
|
+
self.read()
|
|
79
79
|
|
|
80
80
|
else: # map existing dataframe
|
|
81
81
|
self.startDate = value.index[0]
|
|
@@ -88,7 +88,7 @@ class SharedDataTimeSeries:
|
|
|
88
88
|
|
|
89
89
|
self.ctimeidx = self.sharedDataPeriod.getContinousTimeIndex(self.startDate)
|
|
90
90
|
#allocate memory
|
|
91
|
-
isCreate = self.
|
|
91
|
+
isCreate = self.malloc(value=value)
|
|
92
92
|
|
|
93
93
|
self.init_time = time.time() - self.init_time
|
|
94
94
|
|
|
@@ -192,14 +192,14 @@ class SharedDataTimeSeries:
|
|
|
192
192
|
i=i+1
|
|
193
193
|
|
|
194
194
|
# C R U D
|
|
195
|
-
def
|
|
195
|
+
def malloc(self, value=None):
|
|
196
196
|
tini=time.time()
|
|
197
197
|
|
|
198
198
|
#Create write ndarray
|
|
199
199
|
path, shm_name = self.getDataPath(iswrite=True)
|
|
200
200
|
|
|
201
201
|
if os.environ['LOG_LEVEL']=='DEBUG':
|
|
202
|
-
Logger.log.debug('
|
|
202
|
+
Logger.log.debug('malloc %s ...%.2f%% ' % (shm_name,0.0))
|
|
203
203
|
|
|
204
204
|
try: # try create memory file
|
|
205
205
|
r = len(self.index)
|
|
@@ -243,7 +243,7 @@ class SharedDataTimeSeries:
|
|
|
243
243
|
value = self.data
|
|
244
244
|
|
|
245
245
|
if os.environ['LOG_LEVEL']=='DEBUG':
|
|
246
|
-
Logger.log.debug('
|
|
246
|
+
Logger.log.debug('malloc create %s ...%.2f%% %.2f sec! ' % \
|
|
247
247
|
(shm_name,100,time.time()-tini))
|
|
248
248
|
self.create_map = 'create'
|
|
249
249
|
return True
|
|
@@ -284,13 +284,13 @@ class SharedDataTimeSeries:
|
|
|
284
284
|
self.data.loc[iidx, icol] = value.loc[iidx, icol]
|
|
285
285
|
|
|
286
286
|
if os.environ['LOG_LEVEL']=='DEBUG':
|
|
287
|
-
Logger.log.debug('
|
|
287
|
+
Logger.log.debug('malloc map %s/%s/%s ...%.2f%% %.2f sec! ' % \
|
|
288
288
|
(self.feeder,self.period,self.tag,100,time.time()-tini))
|
|
289
289
|
self.create_map = 'map'
|
|
290
290
|
return False
|
|
291
291
|
|
|
292
292
|
# READ
|
|
293
|
-
def
|
|
293
|
+
def read(self):
|
|
294
294
|
tini = time.time()
|
|
295
295
|
path, shm_name = self.getDataPath()
|
|
296
296
|
headpath = path / (self.tag+'_head.bin')
|
|
@@ -365,7 +365,7 @@ class SharedDataTimeSeries:
|
|
|
365
365
|
data_io.close()
|
|
366
366
|
|
|
367
367
|
# WRITE
|
|
368
|
-
def
|
|
368
|
+
def write(self, startDate=None):
|
|
369
369
|
firstdate = self.data.first_valid_index()
|
|
370
370
|
if not startDate is None:
|
|
371
371
|
firstdate = startDate
|
|
@@ -450,8 +450,8 @@ class SharedDataTimeSeries:
|
|
|
450
450
|
os.utime(path, (mtime, mtime))
|
|
451
451
|
|
|
452
452
|
# MESSAGES
|
|
453
|
-
def
|
|
454
|
-
SharedDataRealTime.
|
|
453
|
+
def broadcast(self,idx,col):
|
|
454
|
+
SharedDataRealTime.broadcast(
|
|
455
455
|
self.sharedData,
|
|
456
456
|
self.feeder,
|
|
457
457
|
self.period,
|
|
@@ -0,0 +1,572 @@
|
|
|
1
|
+
# THIRD PARTY LIBS
|
|
2
|
+
import os,sys
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from numba import jit
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from multiprocessing import shared_memory
|
|
10
|
+
import io, gzip, hashlib, shutil
|
|
11
|
+
from threading import Thread
|
|
12
|
+
|
|
13
|
+
from subprocess import run, PIPE
|
|
14
|
+
from datetime import datetime, timedelta
|
|
15
|
+
|
|
16
|
+
from SharedData.Logger import Logger
|
|
17
|
+
from SharedData.SharedDataAWSS3 import S3Upload,S3Download,UpdateModTime
|
|
18
|
+
from SharedData.SharedDataRealTime import SharedDataRealTime
|
|
19
|
+
|
|
20
|
+
class SharedDataTimeSeries:
|
|
21
|
+
|
|
22
|
+
def __init__(self, sharedDataPeriod, tag, value=None,\
|
|
23
|
+
startDate=None,columns=None,overwrite=False):
|
|
24
|
+
self.sharedDataPeriod = sharedDataPeriod
|
|
25
|
+
self.tag = tag
|
|
26
|
+
|
|
27
|
+
self.sharedDataFeeder = sharedDataPeriod.sharedDataFeeder
|
|
28
|
+
self.sharedData = sharedDataPeriod.sharedDataFeeder.sharedData
|
|
29
|
+
|
|
30
|
+
self.period = sharedDataPeriod.period
|
|
31
|
+
self.periodSeconds = sharedDataPeriod.periodSeconds
|
|
32
|
+
self.feeder = self.sharedDataFeeder.feeder
|
|
33
|
+
|
|
34
|
+
# test if shared memory already exists
|
|
35
|
+
if self.ismalloc():
|
|
36
|
+
self.create_map = 'map'
|
|
37
|
+
else:
|
|
38
|
+
self.create_map = 'create'
|
|
39
|
+
|
|
40
|
+
self.init_time = time.time()
|
|
41
|
+
self.download_time = pd.NaT
|
|
42
|
+
self.last_update = pd.NaT
|
|
43
|
+
self.first_update = pd.NaT
|
|
44
|
+
|
|
45
|
+
# Time series dataframe
|
|
46
|
+
self.data = pd.DataFrame()
|
|
47
|
+
self.index = pd.Index([])
|
|
48
|
+
self.columns = pd.Index([])
|
|
49
|
+
|
|
50
|
+
# initalize
|
|
51
|
+
try:
|
|
52
|
+
if ((self.create_map == 'create') | (overwrite)):
|
|
53
|
+
if (not startDate is None):
|
|
54
|
+
# create new empty shared memory
|
|
55
|
+
self.startDate = startDate
|
|
56
|
+
self.columns = columns
|
|
57
|
+
self.malloc_create()
|
|
58
|
+
|
|
59
|
+
elif (not value is None):
|
|
60
|
+
# allocate existing data
|
|
61
|
+
self.startDate = value.index[0]
|
|
62
|
+
self.columns = value.columns
|
|
63
|
+
self.malloc_create()
|
|
64
|
+
self.setValues(value.index,value.columns,value.values)
|
|
65
|
+
|
|
66
|
+
elif (value is None):
|
|
67
|
+
# read & allocate data
|
|
68
|
+
tini = time.time()
|
|
69
|
+
datasize = self.read()
|
|
70
|
+
te = time.time()-tini+0.000001
|
|
71
|
+
Logger.log.debug('read %s/%s %.2fMB in %.2fs %.2fMBps ' % \
|
|
72
|
+
(self.feeder,self.tag,datasize,te,datasize/te))
|
|
73
|
+
|
|
74
|
+
elif (self.create_map == 'map'):
|
|
75
|
+
# map existing shared memory
|
|
76
|
+
self.malloc_map()
|
|
77
|
+
except Exception as e:
|
|
78
|
+
path, shm_name = self.get_path()
|
|
79
|
+
Logger.log.error('Error initalizing %s!\n%s' % (shm_name,str(e)))
|
|
80
|
+
self.free()
|
|
81
|
+
|
|
82
|
+
self.init_time = time.time() - self.init_time
|
|
83
|
+
|
|
84
|
+
def get_path(self):
|
|
85
|
+
shm_name = self.sharedData.user + '/' + self.sharedData.database + '/' \
|
|
86
|
+
+ self.sharedDataFeeder.feeder + '/' + self.period + '/' + self.tag
|
|
87
|
+
if os.name=='posix':
|
|
88
|
+
shm_name = shm_name.replace('/','\\')
|
|
89
|
+
|
|
90
|
+
path = Path(os.environ['DATABASE_FOLDER'])
|
|
91
|
+
path = path / self.sharedData.user
|
|
92
|
+
path = path / self.sharedData.database
|
|
93
|
+
path = path / self.sharedDataFeeder.feeder
|
|
94
|
+
path = path / self.period
|
|
95
|
+
path = path / self.tag
|
|
96
|
+
path = Path(str(path).replace('\\','/'))
|
|
97
|
+
if self.sharedData.save_local:
|
|
98
|
+
if not os.path.isdir(path):
|
|
99
|
+
os.makedirs(path)
|
|
100
|
+
|
|
101
|
+
return path, shm_name
|
|
102
|
+
|
|
103
|
+
def ismalloc(self):
|
|
104
|
+
path, shm_name = self.get_path()
|
|
105
|
+
[self.shm, ismalloc] = self.sharedData.malloc(shm_name)
|
|
106
|
+
return ismalloc
|
|
107
|
+
|
|
108
|
+
def malloc_create(self):
|
|
109
|
+
path, shm_name = self.get_path()
|
|
110
|
+
self.symbolidx = {}
|
|
111
|
+
for i in range(len(self.columns)):
|
|
112
|
+
self.symbolidx[self.columns.values[i]] = i
|
|
113
|
+
self.index = self.sharedDataPeriod.getTimeIndex(self.startDate)
|
|
114
|
+
self.ctimeidx = self.sharedDataPeriod.getContinousTimeIndex(self.startDate)
|
|
115
|
+
try: # try create memory file
|
|
116
|
+
r = len(self.index)
|
|
117
|
+
c = len(self.columns)
|
|
118
|
+
|
|
119
|
+
idx_b = self.index.astype(np.int64).values.tobytes()
|
|
120
|
+
colscsv_b = str.encode(','.join(self.columns.values),\
|
|
121
|
+
encoding='UTF-8',errors='ignore')
|
|
122
|
+
nb_idx = len(idx_b)
|
|
123
|
+
nb_cols = len(colscsv_b)
|
|
124
|
+
nb_data = int(r*c*8)
|
|
125
|
+
header_b = np.array([r,c,nb_idx,nb_cols,nb_data]).astype(np.int64).tobytes()
|
|
126
|
+
nb_header = len(header_b)
|
|
127
|
+
|
|
128
|
+
nb_buf = nb_header+nb_idx+nb_cols+nb_data
|
|
129
|
+
nb_offset = nb_header+nb_idx+nb_cols
|
|
130
|
+
|
|
131
|
+
[self.shm, ismalloc] = self.sharedData.malloc(shm_name,create=True,size=nb_buf)
|
|
132
|
+
|
|
133
|
+
i=0
|
|
134
|
+
self.shm.buf[i:nb_header] = header_b
|
|
135
|
+
i = i + nb_header
|
|
136
|
+
self.shm.buf[i:i+nb_idx] = idx_b
|
|
137
|
+
i = i + nb_idx
|
|
138
|
+
self.shm.buf[i:i+nb_cols] = colscsv_b
|
|
139
|
+
|
|
140
|
+
self.shmarr = np.ndarray((r,c),\
|
|
141
|
+
dtype=np.float64, buffer=self.shm.buf, offset=nb_offset)
|
|
142
|
+
|
|
143
|
+
self.shmarr[:] = np.nan
|
|
144
|
+
|
|
145
|
+
self.data = pd.DataFrame(self.shmarr,\
|
|
146
|
+
index=self.index,\
|
|
147
|
+
columns=self.columns,\
|
|
148
|
+
copy=False)
|
|
149
|
+
|
|
150
|
+
return True
|
|
151
|
+
except Exception as e:
|
|
152
|
+
Logger.log.error('Failed to malloc_create\n%s' % str(e))
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
def malloc_map(self):
|
|
156
|
+
try: # try map memory file
|
|
157
|
+
path, shm_name = self.get_path()
|
|
158
|
+
[self.shm, ismalloc] = self.sharedData.malloc(shm_name)
|
|
159
|
+
|
|
160
|
+
i=0
|
|
161
|
+
nb_header=40
|
|
162
|
+
header = np.frombuffer(self.shm.buf[i:nb_header],dtype=np.int64)
|
|
163
|
+
i = i + nb_header
|
|
164
|
+
nb_idx = header[2]
|
|
165
|
+
idx_b = bytes(self.shm.buf[i:i+nb_idx])
|
|
166
|
+
self.index = pd.to_datetime(np.frombuffer(idx_b,dtype=np.int64))
|
|
167
|
+
i = i + nb_idx
|
|
168
|
+
nb_cols = header[3]
|
|
169
|
+
cols_b = bytes(self.shm.buf[i:i+nb_cols])
|
|
170
|
+
self.columns = cols_b.decode(encoding='UTF-8',errors='ignore').split(',')
|
|
171
|
+
|
|
172
|
+
r = header[0]
|
|
173
|
+
c = header[1]
|
|
174
|
+
nb_data = header[4]
|
|
175
|
+
nb_offset = nb_header+nb_idx+nb_cols
|
|
176
|
+
|
|
177
|
+
self.shmarr = np.ndarray((r,c), dtype=np.float64,\
|
|
178
|
+
buffer=self.shm.buf, offset=nb_offset)
|
|
179
|
+
|
|
180
|
+
self.data = pd.DataFrame(self.shmarr,\
|
|
181
|
+
index=self.index,\
|
|
182
|
+
columns=self.columns,\
|
|
183
|
+
copy=False)
|
|
184
|
+
|
|
185
|
+
return True
|
|
186
|
+
except Exception as e:
|
|
187
|
+
Logger.log.error('Failed to malloc_map\n%s' % str(e))
|
|
188
|
+
return False
|
|
189
|
+
|
|
190
|
+
# READ
|
|
191
|
+
def read(self):
|
|
192
|
+
datasize = 0
|
|
193
|
+
path, shm_name = self.get_path()
|
|
194
|
+
headpath = path / (self.tag+'_head.bin')
|
|
195
|
+
tailpath = path / (self.tag+'_tail.bin')
|
|
196
|
+
head_io = None
|
|
197
|
+
tail_io = None
|
|
198
|
+
if self.sharedData.s3read:
|
|
199
|
+
force_download= (not self.sharedData.save_local)
|
|
200
|
+
|
|
201
|
+
[head_io_gzip, head_local_mtime, head_remote_mtime] = \
|
|
202
|
+
S3Download(str(headpath),str(headpath)+'.gzip',force_download)
|
|
203
|
+
if not head_io_gzip is None:
|
|
204
|
+
head_io = io.BytesIO()
|
|
205
|
+
head_io_gzip.seek(0)
|
|
206
|
+
with gzip.GzipFile(fileobj=head_io_gzip, mode='rb') as gz:
|
|
207
|
+
shutil.copyfileobj(gz,head_io)
|
|
208
|
+
if self.sharedData.save_local:
|
|
209
|
+
SharedDataTimeSeries.write_file(head_io,headpath,mtime=head_remote_mtime)
|
|
210
|
+
UpdateModTime(headpath,head_remote_mtime)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
[tail_io_gzip, tail_local_mtime, tail_remote_mtime] = \
|
|
214
|
+
S3Download(str(tailpath),str(tailpath)+'.gzip',force_download)
|
|
215
|
+
if not tail_io_gzip is None:
|
|
216
|
+
tail_io = io.BytesIO()
|
|
217
|
+
tail_io_gzip.seek(0)
|
|
218
|
+
with gzip.GzipFile(fileobj=tail_io_gzip, mode='rb') as gz:
|
|
219
|
+
shutil.copyfileobj(gz,tail_io)
|
|
220
|
+
if self.sharedData.save_local:
|
|
221
|
+
SharedDataTimeSeries.write_file(tail_io,tailpath,mtime=tail_remote_mtime)
|
|
222
|
+
UpdateModTime(tailpath,tail_remote_mtime)
|
|
223
|
+
|
|
224
|
+
if (head_io is None) & (self.sharedData.save_local):
|
|
225
|
+
# read local
|
|
226
|
+
if os.path.isfile(str(headpath)):
|
|
227
|
+
head_io = open(str(headpath),'rb')
|
|
228
|
+
|
|
229
|
+
if (tail_io is None) & (self.sharedData.save_local):
|
|
230
|
+
if os.path.isfile(str(tailpath)):
|
|
231
|
+
tail_io = open(str(tailpath),'rb')
|
|
232
|
+
|
|
233
|
+
self.startDate = None
|
|
234
|
+
self.columns = pd.Index([])
|
|
235
|
+
# read index, columns
|
|
236
|
+
if not head_io is None:
|
|
237
|
+
head_io.seek(0)
|
|
238
|
+
[index,columns] = self.read_header(head_io)
|
|
239
|
+
self.columns = self.columns.union(columns)
|
|
240
|
+
self.startDate = pd.Timestamp(index.values[0])
|
|
241
|
+
|
|
242
|
+
if not tail_io is None:
|
|
243
|
+
tail_io.seek(0)
|
|
244
|
+
[index,columns] = self.read_header(tail_io)
|
|
245
|
+
self.columns = self.columns.union(columns)
|
|
246
|
+
if self.startDate==None:
|
|
247
|
+
self.startDate = pd.Timestamp(index.values[0])
|
|
248
|
+
|
|
249
|
+
if not self.startDate is None:
|
|
250
|
+
self.malloc_create()
|
|
251
|
+
# read data
|
|
252
|
+
if not head_io is None:
|
|
253
|
+
head_io.seek(0)
|
|
254
|
+
datasize+=self.read_data(head_io,headpath)
|
|
255
|
+
head_io.close()
|
|
256
|
+
|
|
257
|
+
if not tail_io is None:
|
|
258
|
+
tail_io.seek(0)
|
|
259
|
+
datasize+=self.read_data(tail_io,tailpath)
|
|
260
|
+
tail_io.close()
|
|
261
|
+
|
|
262
|
+
return datasize
|
|
263
|
+
|
|
264
|
+
def read_header(self,data_io):
|
|
265
|
+
_header = np.frombuffer(data_io.read(40),dtype=np.int64)
|
|
266
|
+
_idx_b = data_io.read(int(_header[2]))
|
|
267
|
+
_idx = pd.to_datetime(np.frombuffer(_idx_b,dtype=np.int64))
|
|
268
|
+
_colscsv_b = data_io.read(int(_header[3]))
|
|
269
|
+
_colscsv = _colscsv_b.decode(encoding='UTF-8',errors='ignore')
|
|
270
|
+
_cols = _colscsv.split(',')
|
|
271
|
+
return [_idx,_cols]
|
|
272
|
+
|
|
273
|
+
def read_data(self,data_io,path):
|
|
274
|
+
_header = np.frombuffer(data_io.read(40),dtype=np.int64)
|
|
275
|
+
_idx_b = data_io.read(int(_header[2]))
|
|
276
|
+
_idx = pd.to_datetime(np.frombuffer(_idx_b,dtype=np.int64))
|
|
277
|
+
_colscsv_b = data_io.read(int(_header[3]))
|
|
278
|
+
_colscsv = _colscsv_b.decode(encoding='UTF-8',errors='ignore')
|
|
279
|
+
_cols = _colscsv.split(',')
|
|
280
|
+
_data = np.frombuffer(data_io.read(int(_header[4])),dtype=np.float64).reshape((_header[0],_header[1]))
|
|
281
|
+
#calculate hash
|
|
282
|
+
_m = hashlib.md5(_idx_b)
|
|
283
|
+
_m.update(_colscsv_b)
|
|
284
|
+
_m.update(_data)
|
|
285
|
+
_md5hash_b = _m.digest()
|
|
286
|
+
__md5hash_b = data_io.read(16)
|
|
287
|
+
if not _md5hash_b==__md5hash_b:
|
|
288
|
+
raise Exception('Timeseries file corrupted!\n%s' % (path))
|
|
289
|
+
sidx = np.array([self.get_loc_symbol(s) for s in _cols])
|
|
290
|
+
ts = _idx.values.astype(np.int64)/10**9 #seconds
|
|
291
|
+
tidx = self.get_loc_timestamp(ts)
|
|
292
|
+
self.setValuesJit(self.data.values,tidx,sidx,_data)
|
|
293
|
+
data_io.close()
|
|
294
|
+
return _header[4]
|
|
295
|
+
|
|
296
|
+
# WRITE
|
|
297
|
+
def write(self, startDate=None):
|
|
298
|
+
firstdate = self.data.first_valid_index()
|
|
299
|
+
if not startDate is None:
|
|
300
|
+
firstdate = startDate
|
|
301
|
+
self.write_partitions(firstdate)
|
|
302
|
+
|
|
303
|
+
def write_partitions(self,firstdate):
|
|
304
|
+
tini = time.time()
|
|
305
|
+
path, shm_name = self.get_path()
|
|
306
|
+
|
|
307
|
+
partdate = pd.Timestamp(datetime(datetime.now().year,1,1))
|
|
308
|
+
threads = []
|
|
309
|
+
|
|
310
|
+
mtime = datetime.now().timestamp()
|
|
311
|
+
if firstdate<partdate:
|
|
312
|
+
# write head
|
|
313
|
+
threads = [*threads , \
|
|
314
|
+
Thread(target=SharedDataTimeSeries.write_timeseries_df,\
|
|
315
|
+
args=(self,self.data.loc[:partdate], str(path / (self.tag+'_head.bin')), mtime) )]
|
|
316
|
+
# write tail
|
|
317
|
+
threads = [*threads , \
|
|
318
|
+
Thread(target=SharedDataTimeSeries.write_timeseries_df,\
|
|
319
|
+
args=(self,self.data.loc[partdate:], str(path / (self.tag+'_tail.bin')), mtime) )]
|
|
320
|
+
|
|
321
|
+
for i in range(len(threads)):
|
|
322
|
+
threads[i].start()
|
|
323
|
+
|
|
324
|
+
for i in range(len(threads)):
|
|
325
|
+
threads[i].join()
|
|
326
|
+
|
|
327
|
+
def write_timeseries_df(self,df,tag_path,mtime):
|
|
328
|
+
ts_io = SharedDataTimeSeries.create_timeseries_io(df)
|
|
329
|
+
threads=[]
|
|
330
|
+
if self.sharedData.s3write:
|
|
331
|
+
ts_io.seek(0)
|
|
332
|
+
gzip_io = io.BytesIO()
|
|
333
|
+
with gzip.GzipFile(fileobj=gzip_io, mode='wb', compresslevel=1) as gz:
|
|
334
|
+
shutil.copyfileobj(ts_io, gz)
|
|
335
|
+
|
|
336
|
+
threads = [*threads , \
|
|
337
|
+
Thread(target=S3Upload,args=(gzip_io, tag_path+'.gzip', mtime) )]
|
|
338
|
+
|
|
339
|
+
if self.sharedData.save_local:
|
|
340
|
+
threads = [*threads , \
|
|
341
|
+
Thread(target=SharedDataTimeSeries.write_file, args=(ts_io, tag_path, mtime) )]
|
|
342
|
+
|
|
343
|
+
for i in range(len(threads)):
|
|
344
|
+
threads[i].start()
|
|
345
|
+
|
|
346
|
+
for i in range(len(threads)):
|
|
347
|
+
threads[i].join()
|
|
348
|
+
|
|
349
|
+
def create_timeseries_io(df):
|
|
350
|
+
df = df.dropna(how='all',axis=0).dropna(how='all',axis=1)
|
|
351
|
+
r, c = df.shape
|
|
352
|
+
idx = (df.index.astype(np.int64))
|
|
353
|
+
idx_b = idx.values.tobytes()
|
|
354
|
+
cols = df.columns.values
|
|
355
|
+
colscsv = ','.join(cols)
|
|
356
|
+
colscsv_b = str.encode(colscsv,encoding='UTF-8',errors='ignore')
|
|
357
|
+
nbidx = len(idx_b)
|
|
358
|
+
nbcols = len(colscsv_b)
|
|
359
|
+
data = np.ascontiguousarray(df.values.astype(np.float64))
|
|
360
|
+
header = np.array([r,c,nbidx,nbcols,r*c*8]).astype(np.int64)
|
|
361
|
+
#calculate hash
|
|
362
|
+
m = hashlib.md5(idx_b)
|
|
363
|
+
m.update(colscsv_b)
|
|
364
|
+
m.update(data)
|
|
365
|
+
md5hash_b = m.digest()
|
|
366
|
+
# allocate memory
|
|
367
|
+
io_obj = io.BytesIO()
|
|
368
|
+
io_obj.write(header)
|
|
369
|
+
io_obj.write(idx_b)
|
|
370
|
+
io_obj.write(colscsv_b)
|
|
371
|
+
io_obj.write(data)
|
|
372
|
+
io_obj.write(md5hash_b)
|
|
373
|
+
return io_obj
|
|
374
|
+
|
|
375
|
+
def write_file(io_obj,path,mtime):
|
|
376
|
+
with open(path, 'wb') as f:
|
|
377
|
+
f.write(io_obj.getbuffer())
|
|
378
|
+
f.flush()
|
|
379
|
+
os.utime(path, (mtime, mtime))
|
|
380
|
+
|
|
381
|
+
# MESSAGES
|
|
382
|
+
def broadcast(self,idx,col):
|
|
383
|
+
SharedDataRealTime.broadcast(
|
|
384
|
+
self.sharedData,
|
|
385
|
+
self.feeder,
|
|
386
|
+
self.period,
|
|
387
|
+
self.tag,
|
|
388
|
+
idx,col)
|
|
389
|
+
|
|
390
|
+
# get / set
|
|
391
|
+
def get_loc_symbol(self, symbol):
|
|
392
|
+
if symbol in self.symbolidx.keys():
|
|
393
|
+
return self.symbolidx[symbol]
|
|
394
|
+
else:
|
|
395
|
+
return np.nan
|
|
396
|
+
|
|
397
|
+
def get_loc_timestamp(self, ts):
|
|
398
|
+
istartdate = self.startDate.timestamp() #seconds
|
|
399
|
+
if not np.isscalar(ts):
|
|
400
|
+
tidx = self.get_loc_timestamp_Jit(ts, istartdate, \
|
|
401
|
+
self.periodSeconds, self.ctimeidx)
|
|
402
|
+
return tidx
|
|
403
|
+
else:
|
|
404
|
+
tids = np.int64(ts) #seconds
|
|
405
|
+
tids = np.int64(tids - istartdate)
|
|
406
|
+
tids = np.int64(tids/self.periodSeconds)
|
|
407
|
+
if tids<self.ctimeidx.shape[0]:
|
|
408
|
+
tidx = self.ctimeidx[tids]
|
|
409
|
+
return tidx
|
|
410
|
+
else:
|
|
411
|
+
return np.nan
|
|
412
|
+
|
|
413
|
+
@staticmethod
|
|
414
|
+
@jit(nopython=True, nogil=True, cache=True)
|
|
415
|
+
def get_loc_timestamp_Jit(ts, istartdate, periodSeconds, ctimeidx):
|
|
416
|
+
tidx = np.empty(ts.shape, dtype=np.float64)
|
|
417
|
+
len_ctimeidx = len(ctimeidx)
|
|
418
|
+
for i in range(len(tidx)):
|
|
419
|
+
tid = np.int64(ts[i])
|
|
420
|
+
tid = np.int64(tid-istartdate)
|
|
421
|
+
tid = np.int64(tid/periodSeconds)
|
|
422
|
+
if tid < len_ctimeidx:
|
|
423
|
+
tidx[i] = ctimeidx[tid]
|
|
424
|
+
else:
|
|
425
|
+
tidx[i] = np.nan
|
|
426
|
+
return tidx
|
|
427
|
+
|
|
428
|
+
def getValue(self,ts,symbol):
|
|
429
|
+
sidx = self.get_loc_symbol(symbol)
|
|
430
|
+
tidx = self.get_loc_timestamp(ts)
|
|
431
|
+
if (not np.isnan(sidx)) & (not np.isnan(tidx)):
|
|
432
|
+
return self.data.values[np.int64(tidx),int(sidx)]
|
|
433
|
+
else:
|
|
434
|
+
return np.nan
|
|
435
|
+
|
|
436
|
+
def setValue(self,ts,symbol,value):
|
|
437
|
+
sidx = self.get_loc_symbol(symbol)
|
|
438
|
+
tidx = self.get_loc_timestamp(ts)
|
|
439
|
+
if (not np.isnan(sidx)) & (not np.isnan(tidx)):
|
|
440
|
+
self.data.values[np.int64(tidx),int(sidx)] = value
|
|
441
|
+
|
|
442
|
+
def setValues(self,ts,symbol,values):
|
|
443
|
+
sidx = self.get_loc_symbol(symbol)
|
|
444
|
+
tidx = self.get_loc_timestamp(ts)
|
|
445
|
+
self.setValuesSymbolJit(self.data.values, tidx, sidx, values)
|
|
446
|
+
|
|
447
|
+
@staticmethod
|
|
448
|
+
@jit(nopython=True, nogil=True, cache=True)
|
|
449
|
+
def setValuesSymbolJit(values,tidx,sidx,arr):
|
|
450
|
+
if not np.isnan(sidx):
|
|
451
|
+
s = np.int64(sidx)
|
|
452
|
+
i = 0
|
|
453
|
+
for t in tidx:
|
|
454
|
+
if not np.isnan(t):
|
|
455
|
+
values[np.int64(t),s] = arr[i]
|
|
456
|
+
i=i+1
|
|
457
|
+
|
|
458
|
+
@staticmethod
|
|
459
|
+
@jit(nopython=True, nogil=True, cache=True)
|
|
460
|
+
def setValuesJit(values,tidx,sidx,arr):
|
|
461
|
+
i = 0
|
|
462
|
+
for t in tidx:
|
|
463
|
+
if not np.isnan(t):
|
|
464
|
+
j = 0
|
|
465
|
+
for s in sidx:
|
|
466
|
+
if not np.isnan(s):
|
|
467
|
+
values[np.int64(t),np.int64(s)] = arr[i,j]
|
|
468
|
+
j=j+1
|
|
469
|
+
i=i+1
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# C R U D
|
|
473
|
+
def malloc(self, value=None):
|
|
474
|
+
tini=time.time()
|
|
475
|
+
|
|
476
|
+
#Create write ndarray
|
|
477
|
+
path, shm_name = self.get_path()
|
|
478
|
+
|
|
479
|
+
if os.environ['LOG_LEVEL']=='DEBUG':
|
|
480
|
+
Logger.log.debug('malloc %s ...%.2f%% ' % (shm_name,0.0))
|
|
481
|
+
|
|
482
|
+
try: # try create memory file
|
|
483
|
+
r = len(self.index)
|
|
484
|
+
c = len(self.columns)
|
|
485
|
+
|
|
486
|
+
idx_b = self.index.astype(np.int64).values.tobytes()
|
|
487
|
+
colscsv_b = str.encode(','.join(self.columns.values),\
|
|
488
|
+
encoding='UTF-8',errors='ignore')
|
|
489
|
+
nb_idx = len(idx_b)
|
|
490
|
+
nb_cols = len(colscsv_b)
|
|
491
|
+
nb_data = int(r*c*8)
|
|
492
|
+
header_b = np.array([r,c,nb_idx,nb_cols,nb_data]).astype(np.int64).tobytes()
|
|
493
|
+
nb_header = len(header_b)
|
|
494
|
+
|
|
495
|
+
nb_buf = nb_header+nb_idx+nb_cols+nb_data
|
|
496
|
+
nb_offset = nb_header+nb_idx+nb_cols
|
|
497
|
+
|
|
498
|
+
[self.shm, ismalloc] = self.sharedData.malloc(shm_name,create=True,size=nb_buf)
|
|
499
|
+
|
|
500
|
+
i=0
|
|
501
|
+
self.shm.buf[i:nb_header] = header_b
|
|
502
|
+
i = i + nb_header
|
|
503
|
+
self.shm.buf[i:i+nb_idx] = idx_b
|
|
504
|
+
i = i + nb_idx
|
|
505
|
+
self.shm.buf[i:i+nb_cols] = colscsv_b
|
|
506
|
+
|
|
507
|
+
self.shmarr = np.ndarray((r,c),\
|
|
508
|
+
dtype=np.float64, buffer=self.shm.buf, offset=nb_offset)
|
|
509
|
+
|
|
510
|
+
if not value is None:
|
|
511
|
+
self.shmarr[:] = value.values.copy()
|
|
512
|
+
else:
|
|
513
|
+
self.shmarr[:] = np.nan
|
|
514
|
+
|
|
515
|
+
self.data = pd.DataFrame(self.shmarr,\
|
|
516
|
+
index=self.index,\
|
|
517
|
+
columns=self.columns,\
|
|
518
|
+
copy=False)
|
|
519
|
+
|
|
520
|
+
if not value is None:
|
|
521
|
+
value = self.data
|
|
522
|
+
|
|
523
|
+
if os.environ['LOG_LEVEL']=='DEBUG':
|
|
524
|
+
Logger.log.debug('malloc create %s ...%.2f%% %.2f sec! ' % \
|
|
525
|
+
(shm_name,100,time.time()-tini))
|
|
526
|
+
self.create_map = 'create'
|
|
527
|
+
return True
|
|
528
|
+
except Exception as e:
|
|
529
|
+
pass
|
|
530
|
+
|
|
531
|
+
# map memory file
|
|
532
|
+
[self.shm, ismalloc] = self.sharedData.malloc(shm_name)
|
|
533
|
+
|
|
534
|
+
i=0
|
|
535
|
+
nb_header=40
|
|
536
|
+
header = np.frombuffer(self.shm.buf[i:nb_header],dtype=np.int64)
|
|
537
|
+
i = i + nb_header
|
|
538
|
+
nb_idx = header[2]
|
|
539
|
+
idx_b = bytes(self.shm.buf[i:i+nb_idx])
|
|
540
|
+
self.index = pd.to_datetime(np.frombuffer(idx_b,dtype=np.int64))
|
|
541
|
+
i = i + nb_idx
|
|
542
|
+
nb_cols = header[3]
|
|
543
|
+
cols_b = bytes(self.shm.buf[i:i+nb_cols])
|
|
544
|
+
self.columns = cols_b.decode(encoding='UTF-8',errors='ignore').split(',')
|
|
545
|
+
|
|
546
|
+
r = header[0]
|
|
547
|
+
c = header[1]
|
|
548
|
+
nb_data = header[4]
|
|
549
|
+
nb_offset = nb_header+nb_idx+nb_cols
|
|
550
|
+
|
|
551
|
+
self.shmarr = np.ndarray((r,c), dtype=np.float64,\
|
|
552
|
+
buffer=self.shm.buf, offset=nb_offset)
|
|
553
|
+
|
|
554
|
+
self.data = pd.DataFrame(self.shmarr,\
|
|
555
|
+
index=self.index,\
|
|
556
|
+
columns=self.columns,\
|
|
557
|
+
copy=False)
|
|
558
|
+
|
|
559
|
+
if not value is None:
|
|
560
|
+
iidx = value.index.intersection(self.data.index)
|
|
561
|
+
icol = value.columns.intersection(self.data.columns)
|
|
562
|
+
self.data.loc[iidx, icol] = value.loc[iidx, icol]
|
|
563
|
+
|
|
564
|
+
if os.environ['LOG_LEVEL']=='DEBUG':
|
|
565
|
+
Logger.log.debug('malloc map %s/%s/%s ...%.2f%% %.2f sec! ' % \
|
|
566
|
+
(self.feeder,self.period,self.tag,100,time.time()-tini))
|
|
567
|
+
self.create_map = 'map'
|
|
568
|
+
return False
|
|
569
|
+
|
|
570
|
+
def free(self):
|
|
571
|
+
path, shm_name = self.get_path()
|
|
572
|
+
self.sharedData.free(shm_name)
|
|
@@ -19,6 +19,7 @@ src/SharedData/SharedDataRealTimeProcess.py
|
|
|
19
19
|
src/SharedData/SharedDataTable.py
|
|
20
20
|
src/SharedData/SharedDataTableIndex.py
|
|
21
21
|
src/SharedData/SharedDataTableIndexJit.py
|
|
22
|
+
src/SharedData/SharedDataTimeSeries copy.py
|
|
22
23
|
src/SharedData/SharedDataTimeSeries.py
|
|
23
24
|
src/SharedData/SharedNumpy.py
|
|
24
25
|
src/SharedData/Utils.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|