ib-connect 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ib_connect/__init__.py +1 -0
- ib_connect/gui/__init__.py +1 -0
- ib_connect/gui/app.py +153 -0
- ib_connect/shared/__init__.py +1 -0
- ib_connect/shared/ib_connection.py +34 -0
- ib_connect/skills/__init__.py +1 -0
- ib_connect/skills/data_upload/__init__.py +1 -0
- ib_connect/skills/data_upload/data_upload.py +371 -0
- ib_connect/skills/ib_download/__init__.py +1 -0
- ib_connect/skills/ib_download/download.py +63 -0
- ib_connect/skills/ib_download/download_service.py +308 -0
- ib_connect/skills/ib_download/ib_download.py +188 -0
- ib_connect/skills/ib_download/job_queue.py +88 -0
- ib_connect/skills/ib_query/__init__.py +1 -0
- ib_connect/skills/ib_query/ib_query.py +62 -0
- ib_connect/skills/ib_query/query.py +160 -0
- ib_connect-0.2.0.dist-info/METADATA +151 -0
- ib_connect-0.2.0.dist-info/RECORD +22 -0
- ib_connect-0.2.0.dist-info/WHEEL +5 -0
- ib_connect-0.2.0.dist-info/entry_points.txt +3 -0
- ib_connect-0.2.0.dist-info/licenses/LICENSE +21 -0
- ib_connect-0.2.0.dist-info/top_level.txt +1 -0
ib_connect/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# IB Connect Package
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# GUI Package
|
ib_connect/gui/app.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
IB Data Downloader GUI - Flask Web App
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from flask import Flask, render_template, request, jsonify
|
|
7
|
+
import subprocess
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
import asyncio
|
|
12
|
+
import logging
|
|
13
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
from ib_connect.skills.ib_query.query import query_ib
|
|
18
|
+
from ib_connect.skills.ib_download.job_queue import JobQueue
|
|
19
|
+
|
|
20
|
+
app = Flask(__name__)
|
|
21
|
+
logging.getLogger('werkzeug').setLevel(logging.ERROR)
|
|
22
|
+
|
|
23
|
+
# Config
|
|
24
|
+
CONFIG_FILE = 'config.json'
|
|
25
|
+
DEFAULT_CONFIG = {
|
|
26
|
+
'contracts_folder': './data/contracts',
|
|
27
|
+
'ib_host': '127.0.0.1',
|
|
28
|
+
'ib_port': 7497,
|
|
29
|
+
'ib_client_id': 77
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
def load_config():
|
|
33
|
+
if os.path.exists(CONFIG_FILE):
|
|
34
|
+
with open(CONFIG_FILE, 'r') as f:
|
|
35
|
+
return json.load(f)
|
|
36
|
+
return DEFAULT_CONFIG
|
|
37
|
+
|
|
38
|
+
def save_config(config):
|
|
39
|
+
with open(CONFIG_FILE, 'w') as f:
|
|
40
|
+
json.dump(config, f, indent=2)
|
|
41
|
+
|
|
42
|
+
config = load_config()
|
|
43
|
+
|
|
44
|
+
@app.route('/')
|
|
45
|
+
def index():
|
|
46
|
+
return render_template('index.html')
|
|
47
|
+
|
|
48
|
+
@app.route('/query', methods=['POST'])
|
|
49
|
+
def query_contracts():
|
|
50
|
+
data = request.json
|
|
51
|
+
# Include configured IB params to avoid conflicts
|
|
52
|
+
data['host'] = config.get('ib_host', '127.0.0.1')
|
|
53
|
+
data['port'] = config.get('ib_port', 7497)
|
|
54
|
+
data['client_id'] = config.get('ib_client_id', 1)
|
|
55
|
+
def run_query():
|
|
56
|
+
return asyncio.run(query_ib(data))
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
with ThreadPoolExecutor() as executor:
|
|
60
|
+
future = executor.submit(run_query)
|
|
61
|
+
result = future.result()
|
|
62
|
+
if isinstance(result, dict) and 'error' in result:
|
|
63
|
+
return jsonify({'success': False, 'error': result['error']})
|
|
64
|
+
contracts = result if isinstance(result, list) else [result] if result else []
|
|
65
|
+
contracts = [c for c in contracts if c is not None] # Filter out nulls
|
|
66
|
+
return jsonify({'success': True, 'contracts': contracts})
|
|
67
|
+
except Exception as e:
|
|
68
|
+
return jsonify({'success': False, 'error': str(e)})
|
|
69
|
+
|
|
70
|
+
@app.route('/save_contracts', methods=['POST'])
|
|
71
|
+
def save_contracts():
|
|
72
|
+
data = request.json
|
|
73
|
+
selected = data.get('selected', [])
|
|
74
|
+
# Add defaults for missing required fields
|
|
75
|
+
for contract in selected:
|
|
76
|
+
if 'time_zone_id' not in contract or not contract['time_zone_id']:
|
|
77
|
+
contract['time_zone_id'] = 'US/Eastern'
|
|
78
|
+
if 'min_tick' not in contract or not contract['min_tick']:
|
|
79
|
+
contract['min_tick'] = 0.01
|
|
80
|
+
if 'tick_value' not in contract or not contract['tick_value']:
|
|
81
|
+
contract['tick_value'] = contract.get('min_tick', 0.01) * contract.get('multiplier', 1)
|
|
82
|
+
if 'multiplier' not in contract or not contract['multiplier']:
|
|
83
|
+
contract['multiplier'] = 1
|
|
84
|
+
# Omit empty strings and zero values, keep None for db defaults
|
|
85
|
+
cleaned_selected = []
|
|
86
|
+
for contract in selected:
|
|
87
|
+
cleaned = {k: v for k, v in contract.items() if v not in ('', 0)}
|
|
88
|
+
cleaned_selected.append(cleaned)
|
|
89
|
+
folder = config['contracts_folder']
|
|
90
|
+
os.makedirs(folder, exist_ok=True)
|
|
91
|
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
92
|
+
file_path = os.path.join(folder, f'selected_contracts_{timestamp}.json')
|
|
93
|
+
with open(file_path, 'w') as f:
|
|
94
|
+
json.dump(cleaned_selected, f, indent=2)
|
|
95
|
+
return jsonify({'success': True, 'file': file_path})
|
|
96
|
+
|
|
97
|
+
@app.route('/download', methods=['POST'])
|
|
98
|
+
def download_data():
|
|
99
|
+
data = request.json
|
|
100
|
+
contracts = data.get('contracts', [])
|
|
101
|
+
downloads_folder = config.get('downloads_folder', './data/downloads')
|
|
102
|
+
job_queue_db = config.get('job_queue_db', './jobs.db')
|
|
103
|
+
queue = JobQueue(job_queue_db)
|
|
104
|
+
jobs = []
|
|
105
|
+
for contract in contracts:
|
|
106
|
+
params = {
|
|
107
|
+
'conid': contract['conid'],
|
|
108
|
+
'start': contract['start_date'],
|
|
109
|
+
'end': contract['end_date'],
|
|
110
|
+
'bar_size': contract['bar_size'],
|
|
111
|
+
'show': contract.get('show', 'TRADES'),
|
|
112
|
+
'msg': f"Download {contract['symbol']} data ({contract.get('show', 'TRADES')})"
|
|
113
|
+
}
|
|
114
|
+
try:
|
|
115
|
+
job_key = queue.submit_job(params)
|
|
116
|
+
jobs.append({'contract': contract['symbol'], 'job_key': job_key, 'status': 'submitted'})
|
|
117
|
+
except Exception as e:
|
|
118
|
+
jobs.append({'contract': contract['symbol'], 'error': str(e)})
|
|
119
|
+
return jsonify({'jobs': jobs})
|
|
120
|
+
|
|
121
|
+
@app.route('/job_status/<job_key>')
|
|
122
|
+
def job_status(job_key):
|
|
123
|
+
try:
|
|
124
|
+
job_queue_db = config['job_queue_db']
|
|
125
|
+
queue = JobQueue(job_queue_db)
|
|
126
|
+
status_data = queue.get_status(job_key)
|
|
127
|
+
if status_data['status'] == 'not_found':
|
|
128
|
+
return jsonify({'status': 'error', 'details': 'Job not found'})
|
|
129
|
+
details = f"Status: {status_data.get('status', 'unknown')}"
|
|
130
|
+
if status_data.get('message'):
|
|
131
|
+
details += f" - {status_data['message']}"
|
|
132
|
+
if status_data.get('error'):
|
|
133
|
+
details += f" - Error: {status_data['error']}"
|
|
134
|
+
return jsonify({'status': 'ok', 'details': details})
|
|
135
|
+
except Exception as e:
|
|
136
|
+
return jsonify({'status': 'error', 'details': str(e)})
|
|
137
|
+
|
|
138
|
+
@app.route('/cancel_job/<job_key>', methods=['POST'])
|
|
139
|
+
def cancel_job(job_key):
|
|
140
|
+
try:
|
|
141
|
+
job_queue_db = config['job_queue_db']
|
|
142
|
+
queue = JobQueue(job_queue_db)
|
|
143
|
+
status_data = queue.get_status(job_key)
|
|
144
|
+
if status_data['status'] == 'pending':
|
|
145
|
+
queue.remove_job(job_key)
|
|
146
|
+
return jsonify({'success': True})
|
|
147
|
+
else:
|
|
148
|
+
return jsonify({'success': False, 'error': 'Job not pending'})
|
|
149
|
+
except Exception as e:
|
|
150
|
+
return jsonify({'success': False, 'error': str(e)})
|
|
151
|
+
|
|
152
|
+
if __name__ == '__main__':
|
|
153
|
+
app.run(debug=True, host=config.get('host', '127.0.0.1'), port=config.get('port', 5000))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Shared utilities for IB Connect
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from ib_insync import IB
|
|
2
|
+
|
|
3
|
+
class IBConnection:
|
|
4
|
+
"""
|
|
5
|
+
Connection management for Interactive Brokers using ib_insync.
|
|
6
|
+
Provides a connected IB instance that can be used as a context manager.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
@staticmethod
|
|
10
|
+
def connect(host='127.0.0.1', port=7497, clientId=1, timeout=4.0, readonly=False, account=''):
|
|
11
|
+
"""
|
|
12
|
+
Establish a connection to IB and return the connected IB instance (which is a context manager).
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
host (str): IB Gateway/TWS host. Default: '127.0.0.1'
|
|
16
|
+
port (int): IB Gateway/TWS port. Default: 7497
|
|
17
|
+
clientId (int): Client ID for the connection. Default: 1
|
|
18
|
+
timeout (float): Connection timeout in seconds. Default: 4.0
|
|
19
|
+
readonly (bool): Read-only connection. Default: False
|
|
20
|
+
account (str): Account to use (optional). Default: ''
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
IB: Connected IB instance (supports context manager: with ib: ...)
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
ConnectionError: If connection fails.
|
|
27
|
+
"""
|
|
28
|
+
ib = IB()
|
|
29
|
+
try:
|
|
30
|
+
ib.connect(host=host, port=port, clientId=clientId, timeout=timeout, readonly=readonly, account=account)
|
|
31
|
+
except Exception as e:
|
|
32
|
+
raise ConnectionError(f"Failed to connect to Interactive Brokers: {e}") from e
|
|
33
|
+
|
|
34
|
+
return ib
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Skills for IB Connect
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Data Upload skill
|
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Data Upload Service
|
|
4
|
+
|
|
5
|
+
Monitors folder for JSON/CSV files, processes contracts and OHLCV data, inserts to PostgreSQL.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import hashlib
|
|
12
|
+
import shutil
|
|
13
|
+
import time
|
|
14
|
+
import pandas as pd
|
|
15
|
+
import psycopg2
|
|
16
|
+
import psutil
|
|
17
|
+
from psycopg2.extras import execute_values
|
|
18
|
+
import pytz
|
|
19
|
+
from watchdog.observers import Observer
|
|
20
|
+
from watchdog.events import FileSystemEventHandler
|
|
21
|
+
|
|
22
|
+
# Expected contract schema keys
|
|
23
|
+
CONTRACT_KEYS = {
|
|
24
|
+
'conid', 'symbol', 'local_symbol', 'exchange', 'currency', 'sec_type',
|
|
25
|
+
'long_name', 'industry', 'category', 'sub_category', 'min_tick', 'tick_value',
|
|
26
|
+
'contract_month', 'expiration_date', 'under_conid', 'strike', 'right', 'multiplier', 'time_zone_id'
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def compute_conid(symbol, exchange, currency, sec_type):
|
|
30
|
+
"""Compute hash as conid for non-IB contracts (fits in signed BIGINT)."""
|
|
31
|
+
key = f"{symbol}{exchange}{currency}{sec_type}".encode('utf-8')
|
|
32
|
+
hash_obj = hashlib.sha256(key)
|
|
33
|
+
# Take first 7 bytes (56 bits) to fit in signed BIGINT
|
|
34
|
+
return int.from_bytes(hash_obj.digest()[:7], byteorder='big', signed=False)
|
|
35
|
+
|
|
36
|
+
# Required contract schema keys
|
|
37
|
+
REQUIRED_KEYS = {'symbol', 'exchange', 'currency', 'sec_type', 'min_tick', 'tick_value', 'multiplier', 'time_zone_id'}
|
|
38
|
+
|
|
39
|
+
def validate_contract(data):
|
|
40
|
+
"""Validate contract JSON against schema."""
|
|
41
|
+
if not isinstance(data, dict):
|
|
42
|
+
return False
|
|
43
|
+
# Check required keys are present
|
|
44
|
+
if not REQUIRED_KEYS.issubset(data.keys()):
|
|
45
|
+
missing = REQUIRED_KEYS - set(data.keys())
|
|
46
|
+
logging.error(f"Missing required keys: {missing}")
|
|
47
|
+
return False
|
|
48
|
+
# Check no invalid keys
|
|
49
|
+
invalid = set(data.keys()) - CONTRACT_KEYS
|
|
50
|
+
if invalid:
|
|
51
|
+
logging.error(f"Invalid keys: {invalid}")
|
|
52
|
+
return False
|
|
53
|
+
# Basic type checks
|
|
54
|
+
if 'conid' in data and not isinstance(data['conid'], (int, type(None))):
|
|
55
|
+
logging.error("conid must be int or None")
|
|
56
|
+
return False
|
|
57
|
+
if not isinstance(data['min_tick'], (int, float)):
|
|
58
|
+
logging.error("min_tick must be number")
|
|
59
|
+
return False
|
|
60
|
+
if not isinstance(data['tick_value'], (int, float)):
|
|
61
|
+
logging.error("tick_value must be number")
|
|
62
|
+
return False
|
|
63
|
+
if not isinstance(data['multiplier'], (int, float)):
|
|
64
|
+
logging.error("multiplier must be number")
|
|
65
|
+
return False
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
def process_contract_file(filepath, conn, schema, table):
|
|
69
|
+
"""Process a contract JSON file (single object or array)."""
|
|
70
|
+
try:
|
|
71
|
+
with open(filepath, 'r') as f:
|
|
72
|
+
data = json.load(f)
|
|
73
|
+
|
|
74
|
+
if isinstance(data, list):
|
|
75
|
+
# Array of contracts
|
|
76
|
+
success = True
|
|
77
|
+
for item in data:
|
|
78
|
+
if not process_single_contract(item, conn, schema, table):
|
|
79
|
+
success = False
|
|
80
|
+
return success
|
|
81
|
+
else:
|
|
82
|
+
# Single contract
|
|
83
|
+
return process_single_contract(data, conn, schema, table)
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logging.error(f"Error processing {filepath}: {e}", exc_info=True)
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
def normalize_tz(tz_str):
|
|
89
|
+
"""Normalize timezone string to pytz format."""
|
|
90
|
+
parts = tz_str.split('-')
|
|
91
|
+
if len(parts) == 2 and parts[0].lower() == 'us':
|
|
92
|
+
return f"US/{parts[1].capitalize()}"
|
|
93
|
+
else:
|
|
94
|
+
return tz_str.upper()
|
|
95
|
+
|
|
96
|
+
def parse_csv_header(filepath, barsize_tables):
|
|
97
|
+
"""Parse CSV header and return metadata."""
|
|
98
|
+
filename = os.path.basename(filepath)
|
|
99
|
+
if not filename.endswith('.csv'):
|
|
100
|
+
raise ValueError("Not a CSV file")
|
|
101
|
+
|
|
102
|
+
parts = filename[:-4].split('.')
|
|
103
|
+
if len(parts) == 5:
|
|
104
|
+
symbol, conid_str, barsize_file, tz_str, _ = parts
|
|
105
|
+
elif len(parts) == 4:
|
|
106
|
+
symbol, conid_str, barsize_file, tz_str = parts
|
|
107
|
+
else:
|
|
108
|
+
raise ValueError(f"Invalid filename format: {filename}")
|
|
109
|
+
try:
|
|
110
|
+
conid = int(conid_str)
|
|
111
|
+
except ValueError:
|
|
112
|
+
raise ValueError(f"Invalid conid in filename: {conid_str}")
|
|
113
|
+
|
|
114
|
+
# Map barsize to table
|
|
115
|
+
table = barsize_tables.get(barsize_file)
|
|
116
|
+
if not table:
|
|
117
|
+
raise ValueError(f"Unsupported barsize in filename: {barsize_file}")
|
|
118
|
+
|
|
119
|
+
# Map barsize to time_col_name
|
|
120
|
+
if barsize_file == '1min':
|
|
121
|
+
time_col_name = 'time_1m'
|
|
122
|
+
elif barsize_file == '1day':
|
|
123
|
+
time_col_name = 'day'
|
|
124
|
+
else:
|
|
125
|
+
time_col_name = 'time' # fallback
|
|
126
|
+
|
|
127
|
+
# Normalize timezone
|
|
128
|
+
tz = normalize_tz(tz_str)
|
|
129
|
+
try:
|
|
130
|
+
timezone = pytz.timezone(tz)
|
|
131
|
+
except pytz.exceptions.UnknownTimeZoneError:
|
|
132
|
+
raise ValueError(f"Invalid timezone: {tz_str}")
|
|
133
|
+
|
|
134
|
+
# Detect separator
|
|
135
|
+
with open(filepath, 'r') as f:
|
|
136
|
+
sample = f.read(1024)
|
|
137
|
+
sep = '\t' if '\t' in sample and sample.count('\t') > sample.count(',') else ','
|
|
138
|
+
|
|
139
|
+
# Read CSV header
|
|
140
|
+
df = pd.read_csv(filepath, sep=sep, nrows=0)
|
|
141
|
+
columns = df.columns.str.lower().tolist()
|
|
142
|
+
|
|
143
|
+
# Find time column
|
|
144
|
+
time_cols = ['date', 'datetime', 'time', 'timestamp']
|
|
145
|
+
time_col = next((col for col in time_cols if col in columns), None)
|
|
146
|
+
if not time_col:
|
|
147
|
+
raise ValueError(f"No time column found in {filename}")
|
|
148
|
+
|
|
149
|
+
# Check mandatory columns
|
|
150
|
+
mandatory = ['open', 'high', 'low', 'close']
|
|
151
|
+
if not all(col in columns for col in mandatory):
|
|
152
|
+
missing = [col for col in mandatory if col not in columns]
|
|
153
|
+
raise ValueError(f"Missing mandatory columns: {missing}")
|
|
154
|
+
|
|
155
|
+
# Map optional columns
|
|
156
|
+
optional = ['volume', 'trades', 'adjusted_close', 'adj_close']
|
|
157
|
+
col_map = {col: col for col in mandatory + [time_col]}
|
|
158
|
+
for opt in optional:
|
|
159
|
+
if opt in columns:
|
|
160
|
+
col_map[opt] = opt
|
|
161
|
+
elif opt == 'adj_close' and 'adjusted_close' in columns:
|
|
162
|
+
col_map['adjusted_close'] = 'adjusted_close'
|
|
163
|
+
|
|
164
|
+
insert_cols = ['conid', 'symbol', time_col_name] + mandatory + [k for k in optional if k in col_map]
|
|
165
|
+
|
|
166
|
+
return symbol, conid, table, time_col_name, sep, time_col, col_map, insert_cols, timezone
|
|
167
|
+
|
|
168
|
+
def process_csv_data(filepath, sep, time_col, time_col_name, col_map, table, symbol, conid, timezone):
|
|
169
|
+
"""Process CSV data into DataFrame."""
|
|
170
|
+
df = pd.read_csv(filepath, sep=sep)
|
|
171
|
+
df.columns = df.columns.str.lower()
|
|
172
|
+
|
|
173
|
+
# Rename columns
|
|
174
|
+
rename_map = {v: k for k, v in col_map.items() if k != 'time'}
|
|
175
|
+
rename_map[time_col] = time_col_name
|
|
176
|
+
if 'adj_close' in rename_map:
|
|
177
|
+
rename_map['adjusted_close'] = 'adj_close'
|
|
178
|
+
df = df.rename(columns=rename_map)
|
|
179
|
+
|
|
180
|
+
# Add conid and symbol
|
|
181
|
+
df['conid'] = conid
|
|
182
|
+
df['symbol'] = symbol
|
|
183
|
+
|
|
184
|
+
# Localize time
|
|
185
|
+
df[time_col_name] = pd.to_datetime(df[time_col_name])
|
|
186
|
+
if df[time_col_name].dt.tz is None:
|
|
187
|
+
df[time_col_name] = df[time_col_name].dt.tz_localize(timezone)
|
|
188
|
+
else:
|
|
189
|
+
df[time_col_name] = df[time_col_name].dt.tz_convert(timezone)
|
|
190
|
+
|
|
191
|
+
# Cast types
|
|
192
|
+
if table == 'ohlcv_1d':
|
|
193
|
+
if 'volume' in df.columns:
|
|
194
|
+
df['volume'] = df['volume'].astype(int)
|
|
195
|
+
if 'trades' in df.columns:
|
|
196
|
+
df['trades'] = df['trades'].astype(int)
|
|
197
|
+
|
|
198
|
+
return df
|
|
199
|
+
|
|
200
|
+
def insert_ohlcv_data(df, conn, schema, table, insert_cols, time_col_name):
|
|
201
|
+
"""Insert DataFrame into database."""
|
|
202
|
+
quoted_cols = [f'"{col}"' for col in insert_cols]
|
|
203
|
+
conflict_cols = ['conid', time_col_name]
|
|
204
|
+
query = f"INSERT INTO {schema}.{table} ({', '.join(quoted_cols)}) VALUES %s ON CONFLICT ({', '.join(conflict_cols)}) DO NOTHING"
|
|
205
|
+
|
|
206
|
+
batch_size = 1000
|
|
207
|
+
with conn.cursor() as cur:
|
|
208
|
+
for i in range(0, len(df), batch_size):
|
|
209
|
+
batch = df.iloc[i:i+batch_size]
|
|
210
|
+
values = [tuple(row) for row in batch[insert_cols].values]
|
|
211
|
+
execute_values(cur, query, values)
|
|
212
|
+
conn.commit()
|
|
213
|
+
|
|
214
|
+
def process_single_contract(data, conn, schema, table):
|
|
215
|
+
"""Process a single contract dict."""
|
|
216
|
+
try:
|
|
217
|
+
if not validate_contract(data):
|
|
218
|
+
logging.error(f"Validation failed for {data.get('symbol', 'unknown')}")
|
|
219
|
+
return False
|
|
220
|
+
|
|
221
|
+
# Compute conid if missing
|
|
222
|
+
if 'conid' not in data or data['conid'] is None:
|
|
223
|
+
data['conid'] = compute_conid(data['symbol'], data['exchange'], data['currency'], data['sec_type'])
|
|
224
|
+
|
|
225
|
+
# Insert to DB
|
|
226
|
+
with conn.cursor() as cur:
|
|
227
|
+
columns = list(data.keys())
|
|
228
|
+
values = [data[k] for k in columns]
|
|
229
|
+
placeholders = ', '.join(['%s'] * len(columns))
|
|
230
|
+
quoted_columns = [f'"{col}"' for col in columns]
|
|
231
|
+
query = f"INSERT INTO {schema}.{table} ({', '.join(quoted_columns)}) VALUES ({placeholders}) ON CONFLICT (conid) DO NOTHING"
|
|
232
|
+
logging.debug(f"Executing query: {query} with values: {values}")
|
|
233
|
+
cur.execute(query, values)
|
|
234
|
+
conn.commit()
|
|
235
|
+
logging.info(f"Inserted contract {json.dumps(data)}")
|
|
236
|
+
return True
|
|
237
|
+
except Exception as e:
|
|
238
|
+
logging.error(f"Error inserting contract {data.get('symbol', 'unknown')}: {e}", exc_info=True)
|
|
239
|
+
return False
|
|
240
|
+
|
|
241
|
+
def process_ohlcv_file(filepath, conn, schema, barsize_tables):
|
|
242
|
+
"""Process an OHLCV CSV file using parse_csv_header and process_csv_data."""
|
|
243
|
+
try:
|
|
244
|
+
symbol, conid, table, time_col_name, sep, time_col, col_map, insert_cols, timezone = parse_csv_header(filepath, barsize_tables)
|
|
245
|
+
df = process_csv_data(filepath, sep, time_col, time_col_name, col_map, table, symbol, conid, timezone)
|
|
246
|
+
insert_ohlcv_data(df, conn, schema, table, insert_cols, time_col_name)
|
|
247
|
+
logging.info(f"Inserted {len(df)} OHLCV bars for {symbol} into {table}")
|
|
248
|
+
return True
|
|
249
|
+
except Exception as e:
|
|
250
|
+
logging.error(f"Error processing OHLCV file {filepath}: {e}", exc_info=True)
|
|
251
|
+
return False
|
|
252
|
+
|
|
253
|
+
class DataFileHandler(FileSystemEventHandler):
|
|
254
|
+
"""Watchdog handler for new data files."""
|
|
255
|
+
def __init__(self, input_folder, processed_folder, error_folder, conn, config):
|
|
256
|
+
self.input_folder = input_folder
|
|
257
|
+
self.processed_folder = processed_folder
|
|
258
|
+
self.error_folder = error_folder
|
|
259
|
+
self.conn = conn
|
|
260
|
+
self.schema = config.get('schema', 'finance')
|
|
261
|
+
self.contract_table = config.get('contract_table', 'contracts')
|
|
262
|
+
self.barsize_tables = {
|
|
263
|
+
'1min': config.get('ohlcv_1m_table', 'ohlcv_1m'),
|
|
264
|
+
'1day': config.get('ohlcv_1d_table', 'ohlcv_1d')
|
|
265
|
+
}
|
|
266
|
+
os.makedirs(self.processed_folder, exist_ok=True)
|
|
267
|
+
os.makedirs(self.error_folder, exist_ok=True)
|
|
268
|
+
|
|
269
|
+
def on_created(self, event):
|
|
270
|
+
if event.is_directory:
|
|
271
|
+
return
|
|
272
|
+
filepath = event.src_path
|
|
273
|
+
filename = os.path.basename(filepath)
|
|
274
|
+
if not (filename.endswith('.json') or filename.endswith('.csv')):
|
|
275
|
+
return
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
if filename.endswith('.json'):
|
|
279
|
+
success = process_contract_file(filepath, self.conn, self.schema, 'contracts')
|
|
280
|
+
if success:
|
|
281
|
+
shutil.move(filepath, os.path.join(self.processed_folder, filename))
|
|
282
|
+
logging.info(f"Processed and moved {filename} to processed")
|
|
283
|
+
else:
|
|
284
|
+
shutil.move(filepath, os.path.join(self.error_folder, filename))
|
|
285
|
+
logging.error(f"Moved {filename} to errors")
|
|
286
|
+
elif filename.endswith('.csv'):
|
|
287
|
+
success = process_ohlcv_file(filepath, self.conn, self.schema, self.barsize_tables)
|
|
288
|
+
if success:
|
|
289
|
+
shutil.move(filepath, os.path.join(self.processed_folder, filename))
|
|
290
|
+
logging.info(f"Processed and moved {filename} to processed")
|
|
291
|
+
else:
|
|
292
|
+
shutil.move(filepath, os.path.join(self.error_folder, filename))
|
|
293
|
+
logging.error(f"Moved {filename} to errors")
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logging.error(f"Unexpected error processing {filename}: {e}", exc_info=True)
|
|
296
|
+
# Move to errors if not already
|
|
297
|
+
try:
|
|
298
|
+
shutil.move(filepath, os.path.join(self.error_folder, filename))
|
|
299
|
+
except:
|
|
300
|
+
pass
|
|
301
|
+
|
|
302
|
+
def main():
|
|
303
|
+
lock_file = os.path.join(os.path.dirname(__file__), 'service.lock')
|
|
304
|
+
if os.path.exists(lock_file):
|
|
305
|
+
with open(lock_file, 'r') as f:
|
|
306
|
+
pid = f.read().strip()
|
|
307
|
+
try:
|
|
308
|
+
pid_int = int(pid)
|
|
309
|
+
if any(p.pid == pid_int for p in psutil.process_iter() if 'python' in p.name().lower()):
|
|
310
|
+
logging.error("Another instance is running")
|
|
311
|
+
return
|
|
312
|
+
except (ValueError, psutil.NoSuchProcess):
|
|
313
|
+
pass # Lock file stale
|
|
314
|
+
with open(lock_file, 'w') as f:
|
|
315
|
+
f.write(str(os.getpid()))
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
with open('config.json', 'r') as f:
|
|
319
|
+
config = json.load(f)
|
|
320
|
+
|
|
321
|
+
input_folder = config['input_folder']
|
|
322
|
+
processed_folder = config['processed_folder']
|
|
323
|
+
error_folder = config['error_folder']
|
|
324
|
+
db_uri = os.environ.get('PG_URI', os.path.expandvars(config.get('db_uri', '')))
|
|
325
|
+
|
|
326
|
+
if not db_uri:
|
|
327
|
+
print("No DB URI provided")
|
|
328
|
+
return
|
|
329
|
+
|
|
330
|
+
logging.basicConfig(
|
|
331
|
+
filename='data_upload.log',
|
|
332
|
+
level=logging.INFO,
|
|
333
|
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
334
|
+
datefmt='%Y-%m-%d %H:%M:%S'
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
try:
|
|
338
|
+
conn = psycopg2.connect(db_uri)
|
|
339
|
+
logging.info("Connected to DB")
|
|
340
|
+
except Exception as e:
|
|
341
|
+
logging.error(f"DB connection failed: {e}")
|
|
342
|
+
return
|
|
343
|
+
|
|
344
|
+
# Set up file watcher
|
|
345
|
+
event_handler = DataFileHandler(input_folder, processed_folder, error_folder, conn, config)
|
|
346
|
+
observer = Observer()
|
|
347
|
+
observer.schedule(event_handler, input_folder, recursive=False)
|
|
348
|
+
|
|
349
|
+
try:
|
|
350
|
+
observer.start()
|
|
351
|
+
logging.info("Data upload service started successfully - monitoring input folder for .json and .csv files")
|
|
352
|
+
# Process any existing files on startup
|
|
353
|
+
for filename in os.listdir(input_folder):
|
|
354
|
+
filepath = os.path.join(input_folder, filename)
|
|
355
|
+
if os.path.isfile(filepath) and (filename.endswith('.json') or filename.endswith('.csv')):
|
|
356
|
+
event_handler.on_created(type('Event', (), {'is_directory': False, 'src_path': filepath})())
|
|
357
|
+
while True:
|
|
358
|
+
time.sleep(1)
|
|
359
|
+
except KeyboardInterrupt:
|
|
360
|
+
observer.stop()
|
|
361
|
+
observer.join()
|
|
362
|
+
conn.close()
|
|
363
|
+
|
|
364
|
+
except KeyboardInterrupt:
|
|
365
|
+
pass
|
|
366
|
+
finally:
|
|
367
|
+
if os.path.exists(lock_file):
|
|
368
|
+
os.remove(lock_file)
|
|
369
|
+
|
|
370
|
+
if __name__ == '__main__':
|
|
371
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# IB Download Skill
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
IB Download Module
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .job_queue import JobQueue
|
|
7
|
+
|
|
8
|
+
def submit_download_job(params_dict):
|
|
9
|
+
"""
|
|
10
|
+
Submit a download job. Params as dict.
|
|
11
|
+
Returns job_key.
|
|
12
|
+
"""
|
|
13
|
+
queue = JobQueue()
|
|
14
|
+
|
|
15
|
+
# Convert dict to namespace
|
|
16
|
+
class Args:
|
|
17
|
+
pass
|
|
18
|
+
args = Args()
|
|
19
|
+
for k, v in params_dict.items():
|
|
20
|
+
setattr(args, k, v)
|
|
21
|
+
|
|
22
|
+
# Set defaults for job params (not connection params — service uses its own config)
|
|
23
|
+
if not hasattr(args, 'show') or not args.show:
|
|
24
|
+
args.show = 'TRADES'
|
|
25
|
+
if not hasattr(args, 'timeout'):
|
|
26
|
+
args.timeout = 4.0
|
|
27
|
+
if not hasattr(args, 'max_retries'):
|
|
28
|
+
args.max_retries = 3
|
|
29
|
+
if not hasattr(args, 'use_rth'):
|
|
30
|
+
args.use_rth = False
|
|
31
|
+
if not hasattr(args, 'format'):
|
|
32
|
+
args.format = 'ib'
|
|
33
|
+
|
|
34
|
+
return submit_single_job(queue, args)
|
|
35
|
+
|
|
36
|
+
def submit_single_job(queue, args):
|
|
37
|
+
params = {
|
|
38
|
+
'conid': args.conid,
|
|
39
|
+
'start': args.start,
|
|
40
|
+
'end': args.end,
|
|
41
|
+
'bar_size': args.bar_size,
|
|
42
|
+
'show': args.show,
|
|
43
|
+
'timeout': args.timeout,
|
|
44
|
+
'max_retries': args.max_retries,
|
|
45
|
+
'use_rth': args.use_rth,
|
|
46
|
+
'format': args.format,
|
|
47
|
+
'agent': args.agent,
|
|
48
|
+
'msg': args.msg
|
|
49
|
+
}
|
|
50
|
+
# Only store connection params if explicitly provided — absent means service uses its own config
|
|
51
|
+
for attr in ('host', 'port', 'client_id'):
|
|
52
|
+
if hasattr(args, attr):
|
|
53
|
+
params[attr] = getattr(args, attr)
|
|
54
|
+
job_key = queue.submit_job(params)
|
|
55
|
+
return job_key
|
|
56
|
+
|
|
57
|
+
def get_job_status(job_key):
|
|
58
|
+
"""
|
|
59
|
+
Get status of a job.
|
|
60
|
+
Returns dict.
|
|
61
|
+
"""
|
|
62
|
+
queue = JobQueue()
|
|
63
|
+
return queue.get_status(job_key)
|